from __future__ import annotations import contextlib import math import os import sys from collections.abc import Iterable, Sequence from contextlib import nullcontext from datetime import date, datetime, time, timedelta from decimal import Decimal as PyDecimal from typing import ( TYPE_CHECKING, Any, Callable, ClassVar, Literal, NoReturn, Union, overload, ) import polars._reexport as pl from polars import functions as F from polars._dependencies import ( _ALTAIR_AVAILABLE, _PYARROW_AVAILABLE, _check_for_numpy, _check_for_pandas, _check_for_pyarrow, _check_for_torch, altair, import_optional, torch, ) from polars._dependencies import numpy as np from polars._dependencies import pandas as pd from polars._dependencies import pyarrow as pa from polars._utils.construction import ( arrow_to_pyseries, dataframe_to_pyseries, iterable_to_pyseries, numpy_to_pyseries, pandas_to_pyseries, sequence_to_pyseries, series_to_pyseries, ) from polars._utils.convert import ( date_to_int, datetime_to_int, time_to_int, timedelta_to_int, ) from polars._utils.deprecation import ( deprecate_renamed_parameter, deprecated, issue_deprecation_warning, ) from polars._utils.getitem import get_series_item_by_key from polars._utils.unstable import unstable from polars._utils.various import ( BUILDING_SPHINX_DOCS, _is_generator, no_default, parse_version, qualified_type_name, require_same_type, scale_bytes, sphinx_accessor, warn_null_comparison, ) from polars._utils.wrap import wrap_df, wrap_s from polars.datatypes import ( Array, Boolean, Categorical, Date, Datetime, Decimal, Duration, Enum, Float32, Float64, Int32, Int64, List, Null, Object, String, Time, UInt16, UInt32, UInt64, Unknown, is_polars_dtype, maybe_cast, numpy_char_code_to_dtype, parse_into_dtype, supported_numpy_char_code, ) from polars.datatypes._utils import dtype_to_init_repr from polars.exceptions import ComputeError, ModuleUpgradeRequiredError, ShapeError from polars.interchange.protocol import CompatLevel from polars.series.array import ArrayNameSpace from polars.series.binary import BinaryNameSpace from polars.series.categorical import CatNameSpace from polars.series.datetime import DateTimeNameSpace from polars.series.list import ListNameSpace from polars.series.plotting import SeriesPlot from polars.series.string import StringNameSpace from polars.series.struct import StructNameSpace from polars.series.utils import expr_dispatch, get_ffi_func with contextlib.suppress(ImportError): # Module not available when building docs from polars._plr import PyDataFrame, PySeries if TYPE_CHECKING: with contextlib.suppress(ImportError): # Module not available when building docs import polars._plr as plr from collections.abc import Collection, Generator, Mapping import jax import numpy.typing as npt from polars import DataFrame, DataType, Expr from polars._typing import ( ArrowArrayExportable, ArrowStreamExportable, BufferInfo, ClosedInterval, ComparisonOperator, FillNullStrategy, InterpolationMethod, IntoExpr, IntoExprColumn, MultiIndexSelector, NonNestedLiteral, NullBehavior, NumericLiteral, PolarsDataType, PythonLiteral, QuantileMethod, RankMethod, RoundMode, SearchSortedSide, SeriesBuffers, SingleIndexSelector, SizeUnit, TemporalLiteral, ) from polars._utils.various import NoDefault if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self if sys.version_info >= (3, 13): from warnings import deprecated else: from typing_extensions import deprecated # noqa: TC004 elif BUILDING_SPHINX_DOCS: # note: we assign this way to work around an autocomplete issue in ipython/jedi # (ref: https://github.com/davidhalter/jedi/issues/2057) current_module = sys.modules[__name__] current_module.property = sphinx_accessor ArrayLike = Union[ Sequence[Any], "Series", "pa.Array", "pa.ChunkedArray", "np.ndarray[Any, Any]", "pd.Series[Any]", "pd.DatetimeIndex", "ArrowArrayExportable", "ArrowStreamExportable", ] @expr_dispatch class Series: """ A Series represents a single column in a Polars DataFrame. Parameters ---------- name : str, default None Name of the Series. Will be used as a column name when used in a DataFrame. When not specified, name is set to an empty string. values : ArrayLike, default None One-dimensional data in various forms. Supported are: Sequence, Series, pyarrow Array, and numpy ndarray. dtype : DataType, default None Data type of the resulting Series. If set to `None` (default), the data type is inferred from the `values` input. The strategy for data type inference depends on the `strict` parameter: - If `strict` is set to True (default), the inferred data type is equal to the first non-null value, or `Null` if all values are null. - If `strict` is set to False, the inferred data type is the supertype of the values, or :class:`Object` if no supertype can be found. **WARNING**: A full pass over the values is required to determine the supertype. - If no values were passed, the resulting data type is :class:`Null`. strict : bool, default True Throw an error if any value does not exactly match the given or inferred data type. If set to `False`, values that do not match the data type are cast to that data type or, if casting is not possible, set to null instead. nan_to_null : bool, default False In case a numpy array is used to create this Series, indicate how to deal with np.nan values. (This parameter is a no-op on non-numpy data). Examples -------- Constructing a Series by specifying name and values positionally: >>> s = pl.Series("a", [1, 2, 3]) >>> s shape: (3,) Series: 'a' [i64] [ 1 2 3 ] Notice that the dtype is automatically inferred as a polars Int64: >>> s.dtype Int64 Constructing a Series with a specific dtype: >>> s2 = pl.Series("a", [1, 2, 3], dtype=pl.Float32) >>> s2 shape: (3,) Series: 'a' [f32] [ 1.0 2.0 3.0 ] It is possible to construct a Series with values as the first positional argument. This syntax considered an anti-pattern, but it can be useful in certain scenarios. You must specify any other arguments through keywords. >>> s3 = pl.Series([1, 2, 3]) >>> s3 shape: (3,) Series: '' [i64] [ 1 2 3 ] """ # NOTE: This `= None` is needed to generate the docs with sphinx_accessor. _s: PySeries = None # type: ignore[assignment] _accessors: ClassVar[set[str]] = { "arr", "bin", "cat", "dt", "list", "plot", "str", "struct", } def __init__( self, name: str | ArrayLike | None = None, values: ArrayLike | None = None, dtype: PolarsDataType | None = None, *, strict: bool = True, nan_to_null: bool = False, ) -> None: # If 'Unknown' treat as None to trigger type inference if dtype == Unknown: dtype = None elif dtype is not None and not is_polars_dtype(dtype): dtype = parse_into_dtype(dtype) # Handle case where values are passed as the first argument original_name: str | None = None if name is None: name = "" elif isinstance(name, str): original_name = name else: if values is None: values = name name = "" else: msg = "Series name must be a string" raise TypeError(msg) if isinstance(values, Sequence): self._s = sequence_to_pyseries( name, values, dtype=dtype, strict=strict, nan_to_null=nan_to_null, ) elif values is None: self._s = sequence_to_pyseries(name, [], dtype=dtype) elif _check_for_numpy(values) and isinstance(values, np.ndarray): self._s = numpy_to_pyseries( name, values, strict=strict, nan_to_null=nan_to_null ) if values.dtype.type in [np.datetime64, np.timedelta64]: # cast to appropriate dtype, handling NaT values input_dtype = _resolve_temporal_dtype(None, values.dtype) dtype = _resolve_temporal_dtype(dtype, values.dtype) if dtype is not None: self._s = ( # `values.dtype` has already been validated in # `numpy_to_pyseries`, so `input_dtype` can't be `None` self.cast(input_dtype, strict=False) # type: ignore[arg-type] .cast(dtype) .scatter(np.argwhere(np.isnat(values)).flatten(), None) ._s ) return if dtype is not None: self._s = self.cast(dtype, strict=strict)._s elif _check_for_torch(values) and isinstance(values, torch.Tensor): self._s = numpy_to_pyseries( name, values.numpy(force=False), strict=strict, nan_to_null=nan_to_null ) if dtype is not None: self._s = self.cast(dtype, strict=strict)._s elif _check_for_pyarrow(values) and isinstance( values, (pa.Array, pa.ChunkedArray) ): self._s = arrow_to_pyseries(name, values, dtype=dtype, strict=strict) elif _check_for_pandas(values) and isinstance( values, (pd.Series, pd.Index, pd.DatetimeIndex) ): self._s = pandas_to_pyseries(name, values, dtype=dtype, strict=strict) elif not hasattr(values, "__arrow_c_stream__") and _is_generator(values): self._s = iterable_to_pyseries(name, values, dtype=dtype, strict=strict) elif isinstance(values, Series): self._s = series_to_pyseries( original_name, values, dtype=dtype, strict=strict ) elif isinstance(values, pl.DataFrame): self._s = dataframe_to_pyseries( original_name, values, dtype=dtype, strict=strict ) elif hasattr(values, "__arrow_c_array__"): self._s = PySeries.from_arrow_c_array(values) elif hasattr(values, "__arrow_c_stream__"): self._s = PySeries.from_arrow_c_stream(values) else: msg = ( f"Series constructor called with unsupported type {type(values).__name__!r}" " for the `values` parameter" ) raise TypeError(msg) @classmethod def _from_pyseries(cls, pyseries: PySeries) -> Self: series = cls.__new__(cls) series._s = pyseries return series @classmethod @deprecated( "`_import_from_c` is deprecated; use `_import_arrow_from_c` instead. If " "you are using an extension, please compile it with the latest 'pyo3-polars'" ) def _import_from_c(cls, name: str, pointers: list[tuple[int, int]]) -> Self: # `_import_from_c` was deprecated in 1.3 return cls._from_pyseries(PySeries._import_arrow_from_c(name, pointers)) @classmethod def _import_arrow_from_c(cls, name: str, pointers: list[tuple[int, int]]) -> Self: """ Construct a Series from Arrows C interface. Parameters ---------- name The name that should be given to the `Series`. pointers A list with tuples containing two entries: - The raw pointer to a C ArrowArray struct - The raw pointer to a C ArrowSchema struct Warning ------- This will read the `array` pointer without moving it. The host process should garbage collect the heap pointer, but not its contents. """ return cls._from_pyseries(PySeries._import_arrow_from_c(name, pointers)) @classmethod def _import(cls, pointer: int) -> Self: return cls._from_pyseries(PySeries._import(pointer)) def _export_arrow_to_c(self, out_ptr: int, out_schema_ptr: int) -> None: """ Export to a C ArrowArray and C ArrowSchema struct, given their pointers. Parameters ---------- out_ptr: int The raw pointer to a C ArrowArray struct. out_schema_ptr: int (optional) The raw pointer to a C ArrowSchema struct. Notes ----- The series should only contain a single chunk. If you want to export all chunks, first call `Series.get_chunks` to give you a list of chunks. Warning ------- Safety This function will write to the pointers given in `out_ptr` and `out_schema_ptr` and thus is highly unsafe. Leaking If you don't pass the ArrowArray struct to a consumer, array memory will leak. This is a low-level function intended for expert users. """ self._s._export_arrow_to_c(out_ptr, out_schema_ptr) def _get_buffer_info(self) -> BufferInfo: """ Return pointer, offset, and length information about the underlying buffer. Returns ------- tuple of ints Tuple of the form (pointer, offset, length) Raises ------ TypeError If the `Series` data type is not physical. ComputeError If the `Series` contains multiple chunks. Notes ----- This method is mainly intended for use with the dataframe interchange protocol. """ return self._s._get_buffer_info() def _get_buffers(self) -> SeriesBuffers: """ Return the underlying values, validity, and offsets buffers as Series. The values buffer always exists. The validity buffer may not exist if the column contains no null values. The offsets buffer only exists for Series of data type `String` and `List`. Returns ------- dict Dictionary with `"values"`, `"validity"`, and `"offsets"` keys mapping to the corresponding buffer or `None` if the buffer doesn't exist. Warnings -------- The underlying buffers for `String` Series cannot be represented in this format. Instead, the buffers are converted to a values and offsets buffer. Notes ----- This method is mainly intended for use with the dataframe interchange protocol. """ buffers = self._s._get_buffers() keys = ("values", "validity", "offsets") return { # type: ignore[return-value] k: self._from_pyseries(b) if b is not None else b for k, b in zip(keys, buffers) } @classmethod def _from_buffer( cls, dtype: PolarsDataType, buffer_info: BufferInfo, owner: Any ) -> Self: """ Construct a Series from information about its underlying buffer. Parameters ---------- dtype The data type of the buffer. Must be a physical type (integer, float, or boolean). buffer_info Tuple containing buffer information in the form `(pointer, offset, length)`. owner The object owning the buffer. Returns ------- Series Raises ------ TypeError When the given `dtype` is not supported. Notes ----- This method is mainly intended for use with the dataframe interchange protocol. """ return cls._from_pyseries(PySeries._from_buffer(dtype, buffer_info, owner)) @classmethod def _from_buffers( cls, dtype: PolarsDataType, data: Series | Sequence[Series], validity: Series | None = None, ) -> Self: """ Construct a Series from information about its underlying buffers. Parameters ---------- dtype The data type of the resulting Series. data Buffers describing the data. For most data types, this is a single Series of the physical data type of `dtype`. Some data types require multiple buffers: - `String`: A data buffer of type `UInt8` and an offsets buffer of type `Int64`. Note that this does not match how the data is represented internally and data copy is required to construct the Series. validity Validity buffer. If specified, must be a Series of data type `Boolean`. Returns ------- Series Raises ------ TypeError When the given `dtype` is not supported or the other inputs do not match the requirements for constructing a Series of the given `dtype`. Warnings -------- Constructing a `String` Series requires specifying a values and offsets buffer, which does not match the actual underlying buffers. The values and offsets buffer are converted into the actual buffers, which copies data. Notes ----- This method is mainly intended for use with the dataframe interchange protocol. """ if isinstance(data, Series): data_lst = [data._s] else: data_lst = [s._s for s in data] validity_series: plr.PySeries | None = None if validity is not None: validity_series = validity._s return cls._from_pyseries( PySeries._from_buffers(dtype, data_lst, validity_series) ) @staticmethod def _newest_compat_level() -> int: """ Get the newest supported compat level. This is for pyo3-polars. """ return CompatLevel._newest()._version @property def dtype(self) -> DataType: """ Get the data type of this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.dtype Int64 """ return self._s.dtype() @property def flags(self) -> dict[str, bool]: """ Get flags that are set on the Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.flags {'SORTED_ASC': False, 'SORTED_DESC': False} """ out = { "SORTED_ASC": self._s.is_sorted_ascending_flag(), "SORTED_DESC": self._s.is_sorted_descending_flag(), } if self.dtype == List: out["FAST_EXPLODE"] = self._s.can_fast_explode_flag() return out @property def name(self) -> str: """ Get the name of this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.name 'a' """ return self._s.name() @property def shape(self) -> tuple[int]: """ Shape of this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.shape (3,) """ return (self._s.len(),) def __bool__(self) -> NoReturn: msg = ( "the truth value of a Series is ambiguous" "\n\n" "Here are some things you might want to try:\n" "- instead of `if s`, use `if not s.is_empty()`\n" "- instead of `s1 and s2`, use `s1 & s2`\n" "- instead of `s1 or s2`, use `s1 | s2`\n" "- instead of `s in [y, z]`, use `s.is_in([y, z])`\n" ) raise TypeError(msg) def __getstate__(self) -> bytes: return self._s.__getstate__() def __setstate__(self, state: bytes) -> None: self._s = Series()._s # Initialize with a dummy self._s.__setstate__(state) def __str__(self) -> str: s_repr: str = self._s.as_str() return s_repr.replace("Series", f"{self.__class__.__name__}", 1) def __repr__(self) -> str: return self.__str__() def __len__(self) -> int: return self.len() @overload def __and__(self, other: Expr) -> Expr: ... @overload def __and__(self, other: Any) -> Series: ... def __and__(self, other: Any) -> Expr | Series: if isinstance(other, pl.Expr): return F.lit(self) & other if not isinstance(other, Series): other = Series([other]) return self._from_pyseries(self._s.bitand(other._s)) @overload def __rand__(self, other: Expr) -> Expr: ... @overload def __rand__(self, other: Any) -> Series: ... def __rand__(self, other: Any) -> Expr | Series: if isinstance(other, pl.Expr): return other & F.lit(self) if not isinstance(other, Series): other = Series([other]) return other & self @overload def __or__(self, other: Expr) -> Expr: ... @overload def __or__(self, other: Any) -> Series: ... def __or__(self, other: Any) -> Expr | Series: if isinstance(other, pl.Expr): return F.lit(self) | other if not isinstance(other, Series): other = Series([other]) return self._from_pyseries(self._s.bitor(other._s)) @overload def __ror__(self, other: Expr) -> Expr: ... @overload def __ror__(self, other: Any) -> Series: ... def __ror__(self, other: Any) -> Expr | Series: if isinstance(other, pl.Expr): return other | F.lit(self) if not isinstance(other, Series): other = Series([other]) return other | self @overload def __xor__(self, other: Expr) -> Expr: ... @overload def __xor__(self, other: Any) -> Series: ... def __xor__(self, other: Any) -> Expr | Series: if isinstance(other, pl.Expr): return F.lit(self) ^ other if not isinstance(other, Series): other = Series([other]) return self._from_pyseries(self._s.bitxor(other._s)) @overload def __rxor__(self, other: Expr) -> Expr: ... @overload def __rxor__(self, other: Any) -> Series: ... def __rxor__(self, other: Any) -> Expr | Series: if isinstance(other, pl.Expr): return other ^ F.lit(self) if not isinstance(other, Series): other = Series([other]) return other ^ self def _comp(self, other: Any, op: ComparisonOperator) -> Series: # special edge-case; boolean broadcast series (eq/neq) is its own result if self.dtype == Boolean and isinstance(other, bool) and op in ("eq", "neq"): if (other is True and op == "eq") or (other is False and op == "neq"): return self.clone() elif (other is False and op == "eq") or (other is True and op == "neq"): return ~self elif isinstance(other, float) and self.dtype.is_integer(): # require upcast when comparing int series to float value self = self.cast(Float64) f = get_ffi_func(op + "_<>", Float64, self._s) assert f is not None return self._from_pyseries(f(other)) elif isinstance(other, datetime): if self.dtype == Date: # require upcast when comparing date series to datetime self = self.cast(Datetime("us")) time_unit = "us" elif self.dtype == Datetime: # Use local time zone info time_zone = self.dtype.time_zone # type: ignore[attr-defined] if str(other.tzinfo) != str(time_zone): msg = f"datetime time zone {other.tzinfo!r} does not match Series timezone {time_zone!r}" raise TypeError(msg) time_unit = self.dtype.time_unit # type: ignore[attr-defined] else: msg = f"cannot compare datetime.datetime to Series of type {self.dtype}" raise ValueError(msg) ts = datetime_to_int(other, time_unit) # type: ignore[arg-type] f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None return self._from_pyseries(f(ts)) elif isinstance(other, time) and self.dtype == Time: d = time_to_int(other) f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None return self._from_pyseries(f(d)) elif isinstance(other, timedelta) and self.dtype == Duration: time_unit = self.dtype.time_unit # type: ignore[attr-defined] td = timedelta_to_int(other, time_unit) # type: ignore[arg-type] f = get_ffi_func(op + "_<>", Int64, self._s) assert f is not None return self._from_pyseries(f(td)) elif self.dtype in [Categorical, Enum] and not isinstance(other, Series): other = Series([other]) elif isinstance(other, date) and self.dtype == Date: d = date_to_int(other) f = get_ffi_func(op + "_<>", Int32, self._s) assert f is not None return self._from_pyseries(f(d)) if isinstance(other, Sequence) and not isinstance(other, str): if self.dtype in (List, Array): other = [other] other = Series("", other) if other.dtype == Null: other.cast(self.dtype) if isinstance(other, Series): return self._from_pyseries(getattr(self._s, op)(other._s)) try: f = get_ffi_func(op + "_<>", self.dtype, self._s) except NotImplementedError: f = None if f is None: msg = f"Series of type {self.dtype} does not have {op} operator" raise NotImplementedError(msg) if other is not None: other = maybe_cast(other, self.dtype) return self._from_pyseries(f(other)) @overload # type: ignore[override] def __eq__(self, other: Expr) -> Expr: ... # type: ignore[overload-overlap] @overload def __eq__(self, other: object) -> Series: ... def __eq__(self, other: object) -> Series | Expr: warn_null_comparison(other) if isinstance(other, pl.Expr): return F.lit(self).__eq__(other) return self._comp(other, "eq") @overload # type: ignore[override] def __ne__(self, other: Expr) -> Expr: ... # type: ignore[overload-overlap] @overload def __ne__(self, other: object) -> Series: ... def __ne__(self, other: object) -> Series | Expr: warn_null_comparison(other) if isinstance(other, pl.Expr): return F.lit(self).__ne__(other) return self._comp(other, "neq") @overload def __gt__(self, other: Expr) -> Expr: ... @overload def __gt__(self, other: Any) -> Series: ... def __gt__(self, other: Any) -> Series | Expr: warn_null_comparison(other) if isinstance(other, pl.Expr): return F.lit(self).__gt__(other) return self._comp(other, "gt") @overload def __lt__(self, other: Expr) -> Expr: ... @overload def __lt__(self, other: Any) -> Series: ... def __lt__(self, other: Any) -> Series | Expr: warn_null_comparison(other) if isinstance(other, pl.Expr): return F.lit(self).__lt__(other) return self._comp(other, "lt") @overload def __ge__(self, other: Expr) -> Expr: ... @overload def __ge__(self, other: Any) -> Series: ... def __ge__(self, other: Any) -> Series | Expr: warn_null_comparison(other) if isinstance(other, pl.Expr): return F.lit(self).__ge__(other) return self._comp(other, "gt_eq") @overload def __le__(self, other: Expr) -> Expr: ... @overload def __le__(self, other: Any) -> Series: ... def __le__(self, other: Any) -> Series | Expr: warn_null_comparison(other) if isinstance(other, pl.Expr): return F.lit(self).__le__(other) return self._comp(other, "lt_eq") @overload def le(self, other: Expr) -> Expr: ... @overload def le(self, other: Any) -> Series: ... def le(self, other: Any) -> Series | Expr: """Method equivalent of operator expression `series <= other`.""" return self.__le__(other) @overload def lt(self, other: Expr) -> Expr: ... @overload def lt(self, other: Any) -> Series: ... def lt(self, other: Any) -> Series | Expr: """Method equivalent of operator expression `series < other`.""" return self.__lt__(other) @overload def eq(self, other: Expr) -> Expr: ... @overload def eq(self, other: Any) -> Series: ... def eq(self, other: Any) -> Series | Expr: """Method equivalent of operator expression `series == other`.""" return self.__eq__(other) @overload def eq_missing(self, other: Expr) -> Expr: ... @overload def eq_missing(self, other: Any) -> Series: ... def eq_missing(self, other: Any) -> Series | Expr: """ Method equivalent of equality operator `series == other` where `None == None`. This differs from the standard `eq` where null values are propagated. Parameters ---------- other A literal or expression value to compare with. See Also -------- ne_missing eq Examples -------- >>> s1 = pl.Series("a", [333, 200, None]) >>> s2 = pl.Series("a", [100, 200, None]) >>> s1.eq(s2) shape: (3,) Series: 'a' [bool] [ false true null ] >>> s1.eq_missing(s2) shape: (3,) Series: 'a' [bool] [ false true true ] """ if isinstance(other, pl.Expr): return F.lit(self).eq_missing(other) return self.to_frame().select(F.col(self.name).eq_missing(other)).to_series() @overload def ne(self, other: Expr) -> Expr: ... @overload def ne(self, other: Any) -> Series: ... def ne(self, other: Any) -> Series | Expr: """Method equivalent of operator expression `series != other`.""" return self.__ne__(other) @overload def ne_missing(self, other: Expr) -> Expr: ... @overload def ne_missing(self, other: Any) -> Series: ... def ne_missing(self, other: Any) -> Series | Expr: """ Method equivalent of equality operator `series != other` where `None == None`. This differs from the standard `ne` where null values are propagated. Parameters ---------- other A literal or expression value to compare with. See Also -------- eq_missing ne Examples -------- >>> s1 = pl.Series("a", [333, 200, None]) >>> s2 = pl.Series("a", [100, 200, None]) >>> s1.ne(s2) shape: (3,) Series: 'a' [bool] [ true false null ] >>> s1.ne_missing(s2) shape: (3,) Series: 'a' [bool] [ true false false ] """ if isinstance(other, pl.Expr): return F.lit(self).ne_missing(other) return self.to_frame().select(F.col(self.name).ne_missing(other)).to_series() @overload def ge(self, other: Expr) -> Expr: ... @overload def ge(self, other: Any) -> Series: ... def ge(self, other: Any) -> Series | Expr: """Method equivalent of operator expression `series >= other`.""" return self.__ge__(other) @overload def gt(self, other: Expr) -> Expr: ... @overload def gt(self, other: Any) -> Series: ... def gt(self, other: Any) -> Series | Expr: """Method equivalent of operator expression `series > other`.""" return self.__gt__(other) def _arithmetic(self, other: Any, op_s: str, op_ffi: str) -> Self: if isinstance(other, pl.Expr): # expand pl.lit, pl.datetime, pl.duration Exprs to compatible Series other = self.to_frame().select_seq(other).to_series() elif other is None: other = pl.Series("", [None]) if isinstance(other, Series): return self._from_pyseries(getattr(self._s, op_s)(other._s)) elif _check_for_numpy(other) and isinstance(other, np.ndarray): return self._from_pyseries(getattr(self._s, op_s)(Series(other)._s)) elif ( isinstance(other, (float, date, datetime, timedelta, str)) and not self.dtype.is_float() ): _s = sequence_to_pyseries(self.name, [other]) if "rhs" in op_ffi: return self._from_pyseries(getattr(_s, op_s)(self._s)) else: return self._from_pyseries(getattr(self._s, op_s)(_s)) if self.dtype.is_decimal() and isinstance(other, (PyDecimal, int)): if isinstance(other, int): pyseries = sequence_to_pyseries(self.name, [other]) _s = self._from_pyseries(pyseries).cast(Decimal(scale=0))._s else: _s = sequence_to_pyseries(self.name, [other], dtype=Decimal) if "rhs" in op_ffi: return self._from_pyseries(getattr(_s, op_s)(self._s)) else: return self._from_pyseries(getattr(self._s, op_s)(_s)) else: other = maybe_cast(other, self.dtype) f = get_ffi_func(op_ffi, self.dtype, self._s) if f is None: msg = ( f"cannot do arithmetic with Series of dtype: {self.dtype!r} and argument" f" of type: {type(other).__name__!r}" ) raise TypeError(msg) return self._from_pyseries(f(other)) @overload def __add__(self, other: DataFrame) -> DataFrame: ... @overload def __add__(self, other: Expr) -> Expr: ... @overload def __add__(self, other: Any) -> Self: ... def __add__(self, other: Any) -> Series | DataFrame | Expr: if isinstance(other, str): other = Series("", [other]) elif isinstance(other, pl.DataFrame): return other + self elif isinstance(other, pl.Expr): return F.lit(self) + other if self.dtype.is_decimal() and isinstance(other, (float, int)): return self.to_frame().select(F.col(self.name) + other).to_series() return self._arithmetic(other, "add", "add_<>") @overload def __sub__(self, other: Expr) -> Expr: ... @overload def __sub__(self, other: Any) -> Self: ... def __sub__(self, other: Any) -> Series | Expr: if isinstance(other, pl.Expr): return F.lit(self) - other if self.dtype.is_decimal() and isinstance(other, (float, int)): return self.to_frame().select(F.col(self.name) - other).to_series() return self._arithmetic(other, "sub", "sub_<>") def _recursive_cast_to_dtype(self, leaf_dtype: PolarsDataType) -> Series: """ Convert leaf dtype the to given primitive datatype. This is equivalent to logic in DataType::cast_leaf() in Rust. """ def convert_to_primitive(dtype: PolarsDataType) -> PolarsDataType: if isinstance(dtype, Array): return Array(convert_to_primitive(dtype.inner), shape=dtype.shape) if isinstance(dtype, List): return List(convert_to_primitive(dtype.inner)) return leaf_dtype return self.cast(convert_to_primitive(self.dtype)) @overload def __truediv__(self, other: Expr) -> Expr: ... @overload def __truediv__(self, other: Any) -> Series: ... def __truediv__(self, other: Any) -> Series | Expr: if isinstance(other, pl.Expr): return F.lit(self) / other if self.dtype.is_temporal() and not isinstance(self.dtype, Duration): msg = "first cast to integer before dividing datelike dtypes" raise TypeError(msg) if isinstance(other, (int, float)) and ( self.dtype.is_decimal() or isinstance(self.dtype, Duration) ): return self.to_frame().select(F.col(self.name) / other).to_series() self = ( self if ( self.dtype.is_float() or self.dtype.is_decimal() or isinstance(self.dtype, (List, Array, Duration)) or ( isinstance(other, Series) and isinstance(other.dtype, (List, Array)) ) ) else self._recursive_cast_to_dtype(Float64()) ) return self._arithmetic(other, "div", "div_<>") @overload def __floordiv__(self, other: Expr) -> Expr: ... @overload def __floordiv__(self, other: Any) -> Series: ... def __floordiv__(self, other: Any) -> Series | Expr: if isinstance(other, pl.Expr): return F.lit(self) // other if self.dtype.is_temporal(): msg = "first cast to integer before dividing datelike dtypes" raise TypeError(msg) if self.dtype.is_decimal() and isinstance(other, (float, int)): return self.to_frame().select(F.col(self.name) // other).to_series() if not isinstance(other, pl.Expr): other = F.lit(other) return self.to_frame().select_seq(F.col(self.name) // other).to_series() def __invert__(self) -> Series: return self.not_() @overload def __mul__(self, other: Expr) -> Expr: ... @overload def __mul__(self, other: DataFrame) -> DataFrame: ... @overload def __mul__(self, other: Any) -> Series: ... def __mul__(self, other: Any) -> Series | DataFrame | Expr: if isinstance(other, pl.Expr): return F.lit(self) * other if self.dtype.is_temporal() and not isinstance(self.dtype, Duration): msg = "first cast to integer before multiplying datelike dtypes" raise TypeError(msg) if isinstance(other, (int, float)) and ( self.dtype.is_decimal() or isinstance(self.dtype, Duration) ): return self.to_frame().select(F.col(self.name) * other).to_series() elif isinstance(other, pl.DataFrame): return other * self else: return self._arithmetic(other, "mul", "mul_<>") @overload def __mod__(self, other: Expr) -> Expr: ... @overload def __mod__(self, other: Any) -> Series: ... def __mod__(self, other: Any) -> Series | Expr: if isinstance(other, pl.Expr): return F.lit(self).__mod__(other) if self.dtype.is_temporal(): msg = "first cast to integer before applying modulo on datelike dtypes" raise TypeError(msg) if self.dtype.is_decimal() and isinstance(other, (float, int)): return self.to_frame().select(F.col(self.name) % other).to_series() return self._arithmetic(other, "rem", "rem_<>") def __rmod__(self, other: Any) -> Series: if self.dtype.is_temporal(): msg = "first cast to integer before applying modulo on datelike dtypes" raise TypeError(msg) return self._arithmetic(other, "rem", "rem_<>_rhs") def __radd__(self, other: Any) -> Series: if isinstance(other, str) or ( isinstance(other, (int, float)) and self.dtype.is_decimal() ): return self.to_frame().select(other + F.col(self.name)).to_series() return self._arithmetic(other, "add", "add_<>_rhs") def __rsub__(self, other: Any) -> Series: if isinstance(other, (int, float)) and self.dtype.is_decimal(): return self.to_frame().select(other - F.col(self.name)).to_series() return self._arithmetic(other, "sub", "sub_<>_rhs") def __rtruediv__(self, other: Any) -> Series: if self.dtype.is_temporal(): msg = "first cast to integer before dividing datelike dtypes" raise TypeError(msg) if self.dtype.is_float(): self.__rfloordiv__(other) if isinstance(other, (int, float)) and self.dtype.is_decimal(): return self.to_frame().select(other / F.col(self.name)).to_series() if isinstance(other, int): other = float(other) return self.cast(Float64).__rfloordiv__(other) def __rfloordiv__(self, other: Any) -> Series: if self.dtype.is_temporal(): msg = "first cast to integer before dividing datelike dtypes" raise TypeError(msg) return self._arithmetic(other, "div", "div_<>_rhs") def __rmul__(self, other: Any) -> Series: if self.dtype.is_temporal() and not isinstance(self.dtype, Duration): msg = "first cast to integer before multiplying datelike dtypes" raise TypeError(msg) if isinstance(other, (int, float)) and ( self.dtype.is_decimal() or isinstance(self.dtype, Duration) ): return self.to_frame().select(other * F.col(self.name)).to_series() return self._arithmetic(other, "mul", "mul_<>") def __pow__(self, exponent: int | float | Series) -> Series: return self.pow(exponent) def __rpow__(self, other: Any) -> Series: return ( self.to_frame() .select_seq((other ** F.col(self.name)).alias(self.name)) .to_series() ) def __matmul__(self, other: Any) -> float | Series | None: if isinstance(other, Sequence) or ( _check_for_numpy(other) and isinstance(other, np.ndarray) ): other = Series(other) # elif isinstance(other, pl.DataFrame): # return other.__rmatmul__(self) # type: ignore[return-value] return self.dot(other) def __rmatmul__(self, other: Any) -> float | Series | None: if isinstance(other, Sequence) or ( _check_for_numpy(other) and isinstance(other, np.ndarray) ): other = Series(other) return other.dot(self) def __neg__(self) -> Series: return self.to_frame().select_seq(-F.col(self.name)).to_series() def __pos__(self) -> Series: return self def __abs__(self) -> Series: return self.abs() def __copy__(self) -> Self: return self.clone() def __deepcopy__(self, memo: None = None) -> Self: return self.clone() def __contains__(self, item: Any) -> bool: if item is None: return self.has_nulls() return self.implode().list.contains(item).item() def __iter__(self) -> Generator[Any]: if self.dtype in (List, Array): # TODO: either make a change and return py-native list data here, or find # a faster way to return nested/List series; sequential 'get_index' calls # make this path a lot slower (~10x) than it needs to be. get_index = self._s.get_index for idx in range(self.len()): yield get_index(idx) else: buffer_size = 25_000 for offset in range(0, self.len(), buffer_size): yield from self.slice(offset, buffer_size).to_list() @overload def __getitem__(self, key: SingleIndexSelector) -> Any: ... @overload def __getitem__(self, key: MultiIndexSelector) -> Series: ... def __getitem__( self, key: SingleIndexSelector | MultiIndexSelector ) -> Any | Series: """ Get part of the Series as a new Series or scalar. Parameters ---------- key Row(s) to select. Returns ------- Series or scalar, depending on `key`. Examples -------- >>> s = pl.Series("a", [1, 4, 2]) >>> s[0] 1 >>> s[0:2] shape: (2,) Series: 'a' [i64] [ 1 4 ] """ return get_series_item_by_key(self, key) def __setitem__( self, key: int | Series | np.ndarray[Any, Any] | Sequence[object] | tuple[object], value: Any, ) -> None: # do the single idx as first branch as those are likely in a tight loop if isinstance(key, int) and not isinstance(key, bool): self.scatter(key, value) return None elif isinstance(value, Sequence) and not isinstance(value, str): if self.dtype.is_numeric() or self.dtype.is_temporal(): self.scatter(key, value) # type: ignore[arg-type] return None msg = ( f"cannot set Series of dtype: {self.dtype!r} with list/tuple as value;" " use a scalar value" ) raise TypeError(msg) if isinstance(key, Series): if key.dtype == Boolean: self._s = self.set(key, value)._s elif key.dtype == UInt64: self._s = self.scatter(key.cast(UInt32), value)._s elif key.dtype == UInt32: self._s = self.scatter(key, value)._s # TODO: implement for these types without casting to series elif _check_for_numpy(key) and isinstance(key, np.ndarray): if key.dtype == np.bool_: # boolean numpy mask self._s = self.scatter(np.argwhere(key)[:, 0], value)._s else: s = self._from_pyseries( PySeries.new_u32("", np.array(key, np.uint32), _strict=True) ) self.__setitem__(s, value) elif isinstance(key, (list, tuple)): s = self._from_pyseries(sequence_to_pyseries("", key, dtype=UInt32)) self.__setitem__(s, value) else: msg = f'cannot use "{key!r}" for indexing' raise TypeError(msg) def __array__( self, dtype: npt.DTypeLike | None = None, copy: bool | None = None ) -> np.ndarray[Any, Any]: """ Return a NumPy ndarray with the given data type. This method ensures a Polars Series can be treated as a NumPy ndarray. It enables `np.asarray` and NumPy universal functions. See the NumPy documentation for more information: https://numpy.org/doc/stable/user/basics.interoperability.html#the-array-method See Also -------- __array_ufunc__ """ # Cast String types to fixed-length string to support string ufuncs # TODO: Use variable-length strings instead when NumPy 2.0.0 comes out: # https://numpy.org/devdocs/reference/routines.dtypes.html#numpy.dtypes.StringDType if dtype is None and not self.has_nulls() and self.dtype == String: dtype = np.dtype("U") if copy is None: writable, allow_copy = False, True elif copy is True: writable, allow_copy = True, True elif copy is False: writable, allow_copy = False, False else: msg = f"invalid input for `copy`: {copy!r}" raise TypeError(msg) arr = self.to_numpy(writable=writable, allow_copy=allow_copy) if dtype is not None and dtype != arr.dtype: if copy is False: # TODO: Only raise when data must be copied msg = f"copy not allowed: cast from {arr.dtype} to {dtype} prohibited" raise RuntimeError(msg) arr = arr.__array__(dtype) return arr def __array_ufunc__( self, ufunc: np.ufunc, method: str, *inputs: Any, **kwargs: Any ) -> Series: """Numpy universal functions.""" if self._s.n_chunks() > 1: self._s.rechunk(in_place=True) s = self._s if method == "__call__": if ufunc.nout != 1: msg = "only ufuncs that return one 1D array are supported" raise NotImplementedError(msg) args: list[int | float | np.ndarray[Any, Any]] = [] for arg in inputs: if isinstance(arg, (int, float, np.ndarray)): args.append(arg) elif isinstance(arg, Series): phys_arg = arg.to_physical() if phys_arg._s.n_chunks() > 1: phys_arg._s.rechunk(in_place=True) args.append(phys_arg._s.to_numpy_view()) # type: ignore[arg-type] else: msg = f"unsupported type {qualified_type_name(arg)!r} for {arg!r}" raise TypeError(msg) # Get minimum dtype needed to be able to cast all input arguments to the # same dtype. dtype_char_minimum: str = np.result_type(*args).char # Get all possible output dtypes for ufunc. # Input dtypes and output dtypes seem to always match for ufunc.types, # so pick all the different output dtypes. dtypes_ufunc = [ input_output_type[-1] for input_output_type in ufunc.types if supported_numpy_char_code(input_output_type[-1]) ] # Get the first ufunc dtype from all possible ufunc dtypes for which # the input arguments can be safely cast to that ufunc dtype. for dtype_ufunc in dtypes_ufunc: if np.can_cast(dtype_char_minimum, dtype_ufunc): dtype_char_minimum = dtype_ufunc break # Override minimum dtype if requested. dtype_char = ( np.dtype(kwargs.pop("dtype")).char if "dtype" in kwargs else dtype_char_minimum ) # Only generalized ufuncs have a signature set: is_generalized_ufunc = bool(ufunc.signature) if is_generalized_ufunc: # Generalized ufuncs will operate on the whole array, so # missing data can corrupt the results. if self.has_nulls(): msg = "can't pass a Series with missing data to a generalized ufunc, as it might give unexpected results. See https://docs.pola.rs/user-guide/expressions/missing-data/ for suggestions on how to remove or fill in missing data." raise ComputeError(msg) # If the input and output are the same size, e.g. "(n)->(n)" we # can allocate ourselves and save a copy. If they're different, # we let the ufunc do the allocation, since only it knows the # output size. assert ufunc.signature is not None # pacify MyPy ufunc_input, ufunc_output = ufunc.signature.split("->") if ufunc_output == "()": # If the result a scalar, just let the function do its # thing, no need for any song and dance involving # allocation: return ufunc(*args, dtype=dtype_char, **kwargs) else: allocate_output = ufunc_input == ufunc_output else: allocate_output = True f = get_ffi_func("apply_ufunc_<>", numpy_char_code_to_dtype(dtype_char), s) if f is None: msg = ( "could not find " f"`apply_ufunc_{numpy_char_code_to_dtype(dtype_char)}`" ) raise NotImplementedError(msg) series = f( lambda out: ufunc(*args, out=out, dtype=dtype_char, **kwargs), allocate_output, ) result = self._from_pyseries(series) if is_generalized_ufunc: # In this case we've disallowed passing in missing data, so no # further processing is needed. return result # We're using a regular ufunc, that operates value by value. That # means we allowed missing data in the input, so filter it out: validity_mask = self.is_not_null() for arg in inputs: if isinstance(arg, Series): validity_mask &= arg.is_not_null() return ( result.to_frame() .select(F.when(validity_mask).then(F.col(self.name))) .to_series(0) ) else: msg = ( "only `__call__` is implemented for numpy ufuncs on a Series, got " f"`{method!r}`" ) raise NotImplementedError(msg) def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: """ Export a Series via the Arrow PyCapsule Interface. https://arrow.apache.org/docs/dev/format/CDataInterface/PyCapsuleInterface.html """ return self._s.__arrow_c_stream__(requested_schema) def _repr_html_(self) -> str: """Format output data in HTML for display in Jupyter Notebooks.""" return self.to_frame()._repr_html_(_from_series=True) def item(self, index: int | None = None) -> Any: """ Return the Series as a scalar, or return the element at the given index. If no index is provided, this is equivalent to `s[0]`, with a check that the shape is (1,). With an index, this is equivalent to `s[index]`. Examples -------- >>> s1 = pl.Series("a", [1]) >>> s1.item() 1 >>> s2 = pl.Series("a", [9, 8, 7]) >>> s2.cum_sum().item(-1) 24 """ if index is None: if len(self) != 1: msg = ( "can only call '.item()' if the Series is of length 1," f" or an explicit index is provided (Series is of length {len(self)})" ) raise ValueError(msg) return self._s.get_index(0) return self._s.get_index_signed(index) def estimated_size(self, unit: SizeUnit = "b") -> int | float: """ Return an estimation of the total (heap) allocated size of the Series. Estimated size is given in the specified unit (bytes by default). This estimation is the sum of the size of its buffers, validity, including nested arrays. Multiple arrays may share buffers and bitmaps. Therefore, the size of 2 arrays is not the sum of the sizes computed from this function. In particular, [`StructArray`]'s size is an upper bound. When an array is sliced, its allocated size remains constant because the buffer unchanged. However, this function will yield a smaller number. This is because this function returns the visible size of the buffer, not its total capacity. FFI buffers are included in this estimation. Notes ----- For data with Object dtype, the estimated size only reports the pointer size, which is a huge underestimation. Parameters ---------- unit : {'b', 'kb', 'mb', 'gb', 'tb'} Scale the returned size to the given unit. Examples -------- >>> s = pl.Series("values", list(range(1_000_000)), dtype=pl.UInt32) >>> s.estimated_size() 4000000 >>> s.estimated_size("mb") 3.814697265625 """ sz = self._s.estimated_size() return scale_bytes(sz, unit) def sqrt(self) -> Series: """ Compute the square root of the elements. Syntactic sugar for >>> pl.Series([1, 2]) ** 0.5 shape: (2,) Series: '' [f64] [ 1.0 1.414214 ] Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.sqrt() shape: (3,) Series: '' [f64] [ 1.0 1.414214 1.732051 ] """ def cbrt(self) -> Series: """ Compute the cube root of the elements. Optimization for >>> pl.Series([1, 2]) ** (1.0 / 3) shape: (2,) Series: '' [f64] [ 1.0 1.259921 ] Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.cbrt() shape: (3,) Series: '' [f64] [ 1.0 1.259921 1.44225 ] """ @overload def any(self, *, ignore_nulls: Literal[True] = ...) -> bool: ... @overload def any(self, *, ignore_nulls: bool) -> bool | None: ... def any(self, *, ignore_nulls: bool = True) -> bool | None: """ Return whether any of the values in the column are `True`. Only works on columns of data type :class:`Boolean`. Parameters ---------- ignore_nulls * If set to `True` (default), null values are ignored. If there are no non-null values, the output is `False`. * If set to `False`, `Kleene logic`_ is used to deal with nulls: if the column contains any null values and no `True` values, the output is `None`. .. _Kleene logic: https://en.wikipedia.org/wiki/Three-valued_logic Returns ------- bool or None Examples -------- >>> pl.Series([True, False]).any() True >>> pl.Series([False, False]).any() False >>> pl.Series([None, False]).any() False Enable Kleene logic by setting `ignore_nulls=False`. >>> pl.Series([None, False]).any(ignore_nulls=False) # Returns None """ return self._s.any(ignore_nulls=ignore_nulls) @overload def all(self, *, ignore_nulls: Literal[True] = ...) -> bool: ... @overload def all(self, *, ignore_nulls: bool) -> bool | None: ... def all(self, *, ignore_nulls: bool = True) -> bool | None: """ Return whether all values in the column are `True`. Only works on columns of data type :class:`Boolean`. Parameters ---------- ignore_nulls * If set to `True` (default), null values are ignored. If there are no non-null values, the output is `True`. * If set to `False`, `Kleene logic`_ is used to deal with nulls: if the column contains any null values and no `False` values, the output is `None`. .. _Kleene logic: https://en.wikipedia.org/wiki/Three-valued_logic Returns ------- bool or None Examples -------- >>> pl.Series([True, True]).all() True >>> pl.Series([False, True]).all() False >>> pl.Series([None, True]).all() True Enable Kleene logic by setting `ignore_nulls=False`. >>> pl.Series([None, True]).all(ignore_nulls=False) # Returns None """ return self._s.all(ignore_nulls=ignore_nulls) def log(self, base: float | Series = math.e) -> Series: """ Compute the logarithm to a given base. Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.log() shape: (3,) Series: '' [f64] [ 0.0 0.693147 1.098612 ] """ def log1p(self) -> Series: """ Compute the natural logarithm of the input array plus one, element-wise. Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.log1p() shape: (3,) Series: '' [f64] [ 0.693147 1.098612 1.386294 ] """ def log10(self) -> Series: """ Compute the base 10 logarithm of the input array, element-wise. Examples -------- >>> s = pl.Series([10, 100, 1000]) >>> s.log10() shape: (3,) Series: '' [f64] [ 1.0 2.0 3.0 ] """ def exp(self) -> Series: """ Compute the exponential, element-wise. Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.exp() shape: (3,) Series: '' [f64] [ 2.718282 7.389056 20.085537 ] """ def drop_nulls(self) -> Series: """ Drop all null values. The original order of the remaining elements is preserved. See Also -------- drop_nans Notes ----- A null value is not the same as a NaN value. To drop NaN values, use :func:`drop_nans`. Examples -------- >>> s = pl.Series([1.0, None, 3.0, float("nan")]) >>> s.drop_nulls() shape: (3,) Series: '' [f64] [ 1.0 3.0 NaN ] """ def drop_nans(self) -> Series: """ Drop all floating point NaN values. The original order of the remaining elements is preserved. See Also -------- drop_nulls Notes ----- A NaN value is not the same as a null value. To drop null values, use :func:`drop_nulls`. Examples -------- >>> s = pl.Series([1.0, None, 3.0, float("nan")]) >>> s.drop_nans() shape: (3,) Series: '' [f64] [ 1.0 null 3.0 ] """ def to_frame(self, name: str | None = None) -> DataFrame: """ Cast this Series to a DataFrame. Parameters ---------- name optionally name/rename the Series column in the new DataFrame. Examples -------- >>> s = pl.Series("a", [123, 456]) >>> df = s.to_frame() >>> df shape: (2, 1) ┌─────┐ │ a │ │ --- │ │ i64 │ ╞═════╡ │ 123 │ │ 456 │ └─────┘ >>> df = s.to_frame("xyz") >>> df shape: (2, 1) ┌─────┐ │ xyz │ │ --- │ │ i64 │ ╞═════╡ │ 123 │ │ 456 │ └─────┘ """ if isinstance(name, str): return wrap_df(PyDataFrame([self.rename(name)._s])) return wrap_df(PyDataFrame([self._s])) def describe( self, percentiles: Sequence[float] | float | None = (0.25, 0.50, 0.75), interpolation: QuantileMethod = "nearest", ) -> DataFrame: """ Quick summary statistics of a Series. Series with mixed datatypes will return summary statistics for the datatype of the first value. Parameters ---------- percentiles One or more percentiles to include in the summary statistics (if the Series has a numeric dtype). All values must be in the range `[0, 1]`. interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'} Interpolation method used when calculating percentiles. Notes ----- The median is included by default as the 50% percentile. Returns ------- DataFrame Mapping with summary statistics of a Series. Examples -------- >>> s = pl.Series([1, 2, 3, 4, 5]) >>> s.describe() shape: (9, 2) ┌────────────┬──────────┐ │ statistic ┆ value │ │ --- ┆ --- │ │ str ┆ f64 │ ╞════════════╪══════════╡ │ count ┆ 5.0 │ │ null_count ┆ 0.0 │ │ mean ┆ 3.0 │ │ std ┆ 1.581139 │ │ min ┆ 1.0 │ │ 25% ┆ 2.0 │ │ 50% ┆ 3.0 │ │ 75% ┆ 4.0 │ │ max ┆ 5.0 │ └────────────┴──────────┘ Non-numeric data types may not have all statistics available. >>> s = pl.Series(["aa", "aa", None, "bb", "cc"]) >>> s.describe() shape: (4, 2) ┌────────────┬───────┐ │ statistic ┆ value │ │ --- ┆ --- │ │ str ┆ str │ ╞════════════╪═══════╡ │ count ┆ 4 │ │ null_count ┆ 1 │ │ min ┆ aa │ │ max ┆ cc │ └────────────┴───────┘ """ # noqa: W505 stats = self.to_frame().describe( percentiles=percentiles, interpolation=interpolation, ) stats.columns = ["statistic", "value"] return stats.filter(F.col("value").is_not_null()) def sum(self) -> int | float: """ Reduce this Series to the sum value. Notes ----- * Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. * If there are no non-null values, then the output is `0`. If you would prefer empty sums to return `None`, you can use `s.sum() if s.count() else None` instead of `s.sum()`. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.sum() 6 """ return self._s.sum() def mean(self) -> PythonLiteral | None: """ Reduce this Series to the mean value. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.mean() 2.0 """ return self._s.mean() def product(self) -> int | float: """ Reduce this Series to the product value. Notes ----- If there are no non-null values, then the output is `1`. If you would prefer empty products to return `None`, you can use `s.product() if s.count() else None` instead of `s.product()`. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.product() 6 """ return self._s.product() def pow(self, exponent: int | float | Series) -> Series: """ Raise to the power of the given exponent. If the exponent is float, the result follows the dtype of exponent. Otherwise, it follows dtype of base. Parameters ---------- exponent The exponent. Accepts Series input. Examples -------- Raising integers to positive integers results in integers: >>> s = pl.Series("foo", [1, 2, 3, 4]) >>> s.pow(3) shape: (4,) Series: 'foo' [i64] [ 1 8 27 64 ] In order to raise integers to negative integers, you can cast either the base or the exponent to float: >>> s.pow(-3.0) shape: (4,) Series: 'foo' [f64] [ 1.0 0.125 0.037037 0.015625 ] """ if _check_for_numpy(exponent) and isinstance(exponent, np.ndarray): exponent = Series(exponent) return self.to_frame().select_seq(F.col(self.name).pow(exponent)).to_series() def min(self) -> PythonLiteral | None: """ Get the minimal value in this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.min() 1 """ return self._s.min() def max(self) -> PythonLiteral | None: """ Get the maximum value in this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.max() 3 """ return self._s.max() def nan_max(self) -> int | float | date | datetime | timedelta | str: """ Get maximum value, but propagate/poison encountered NaN values. This differs from numpy's `nanmax` as numpy defaults to propagating NaN values, whereas polars defaults to ignoring them. Examples -------- >>> s = pl.Series("a", [1, 3, 4]) >>> s.nan_max() 4 >>> s = pl.Series("a", [1.0, float("nan"), 4.0]) >>> s.nan_max() nan """ return self.to_frame().select_seq(F.col(self.name).nan_max()).item() def nan_min(self) -> int | float | date | datetime | timedelta | str: """ Get minimum value, but propagate/poison encountered NaN values. This differs from numpy's `nanmax` as numpy defaults to propagating NaN values, whereas polars defaults to ignoring them. Examples -------- >>> s = pl.Series("a", [1, 3, 4]) >>> s.nan_min() 1 >>> s = pl.Series("a", [1.0, float("nan"), 4.0]) >>> s.nan_min() nan """ return self.to_frame().select_seq(F.col(self.name).nan_min()).item() def std(self, ddof: int = 1) -> float | timedelta | None: """ Get the standard deviation of this Series. Parameters ---------- ddof “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.std() 1.0 """ return self._s.std(ddof) def var(self, ddof: int = 1) -> float | timedelta | None: """ Get variance of this Series. Parameters ---------- ddof “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, where N represents the number of elements. By default ddof is 1. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.var() 1.0 """ return self._s.var(ddof) def median(self) -> PythonLiteral | None: """ Get the median of this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.median() 2.0 """ return self._s.median() def quantile( self, quantile: float, interpolation: QuantileMethod = "nearest" ) -> float | None: """ Get the quantile value of this Series. Parameters ---------- quantile Quantile between 0.0 and 1.0. interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'} Interpolation method. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.quantile(0.5) 2.0 """ # noqa: W505 return self._s.quantile(quantile, interpolation) def to_dummies( self, *, separator: str = "_", drop_first: bool = False, drop_nulls: bool = False, ) -> DataFrame: """ Get dummy/indicator variables. Parameters ---------- separator Separator/delimiter used when generating column names. drop_first Remove the first category from the variable being encoded. drop_nulls If there are `None` values in the series, a `null` column is not generated Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.to_dummies() shape: (3, 3) ┌─────┬─────┬─────┐ │ a_1 ┆ a_2 ┆ a_3 │ │ --- ┆ --- ┆ --- │ │ u8 ┆ u8 ┆ u8 │ ╞═════╪═════╪═════╡ │ 1 ┆ 0 ┆ 0 │ │ 0 ┆ 1 ┆ 0 │ │ 0 ┆ 0 ┆ 1 │ └─────┴─────┴─────┘ >>> s.to_dummies(drop_first=True) shape: (3, 2) ┌─────┬─────┐ │ a_2 ┆ a_3 │ │ --- ┆ --- │ │ u8 ┆ u8 │ ╞═════╪═════╡ │ 0 ┆ 0 │ │ 1 ┆ 0 │ │ 0 ┆ 1 │ └─────┴─────┘ """ return wrap_df(self._s.to_dummies(separator, drop_first, drop_nulls)) @unstable() def cut( self, breaks: Sequence[float], *, labels: Sequence[str] | None = None, left_closed: bool = False, include_breaks: bool = False, ) -> Series: """ Bin continuous values into discrete categories. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Parameters ---------- breaks List of unique cut points. labels Names of the categories. The number of labels must be equal to the number of cut points plus one. left_closed Set the intervals to be left-closed instead of right-closed. include_breaks Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a :class:`Categorical` to a :class:`Struct`. Returns ------- Series Series of data type :class:`Categorical` if `include_breaks` is set to `False` (default), otherwise a Series of data type :class:`Struct`. See Also -------- qcut Examples -------- Divide the column into three categories. >>> s = pl.Series("foo", [-2, -1, 0, 1, 2]) >>> s.cut([-1, 1], labels=["a", "b", "c"]) shape: (5,) Series: 'foo' [cat] [ "a" "a" "b" "b" "c" ] Create a DataFrame with the breakpoint and category for each value. >>> cut = s.cut([-1, 1], include_breaks=True).alias("cut") >>> s.to_frame().with_columns(cut).unnest("cut") shape: (5, 3) ┌─────┬────────────┬────────────┐ │ foo ┆ breakpoint ┆ category │ │ --- ┆ --- ┆ --- │ │ i64 ┆ f64 ┆ cat │ ╞═════╪════════════╪════════════╡ │ -2 ┆ -1.0 ┆ (-inf, -1] │ │ -1 ┆ -1.0 ┆ (-inf, -1] │ │ 0 ┆ 1.0 ┆ (-1, 1] │ │ 1 ┆ 1.0 ┆ (-1, 1] │ │ 2 ┆ inf ┆ (1, inf] │ └─────┴────────────┴────────────┘ """ @unstable() def qcut( self, quantiles: Sequence[float] | int, *, labels: Sequence[str] | None = None, left_closed: bool = False, allow_duplicates: bool = False, include_breaks: bool = False, ) -> Series: """ Bin continuous values into discrete categories based on their quantiles. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Parameters ---------- quantiles Either a list of quantile probabilities between 0 and 1 or a positive integer determining the number of bins with uniform probability. labels Names of the categories. The number of labels must be equal to the number of cut points plus one. left_closed Set the intervals to be left-closed instead of right-closed. allow_duplicates If set to `True`, duplicates in the resulting quantiles are dropped, rather than raising a `DuplicateError`. This can happen even with unique probabilities, depending on the data. include_breaks Include a column with the right endpoint of the bin each observation falls in. This will change the data type of the output from a :class:`Categorical` to a :class:`Struct`. Returns ------- Series Series of data type :class:`Categorical` if `include_breaks` is set to `False` (default), otherwise a Series of data type :class:`Struct`. See Also -------- cut Examples -------- Divide a column into three categories according to pre-defined quantile probabilities. >>> s = pl.Series("foo", [-2, -1, 0, 1, 2]) >>> s.qcut([0.25, 0.75], labels=["a", "b", "c"]) shape: (5,) Series: 'foo' [cat] [ "a" "a" "b" "b" "c" ] Divide a column into two categories using uniform quantile probabilities. >>> s.qcut(2, labels=["low", "high"], left_closed=True) shape: (5,) Series: 'foo' [cat] [ "low" "low" "high" "high" "high" ] Create a DataFrame with the breakpoint and category for each value. >>> cut = s.qcut([0.25, 0.75], include_breaks=True).alias("cut") >>> s.to_frame().with_columns(cut).unnest("cut") shape: (5, 3) ┌─────┬────────────┬────────────┐ │ foo ┆ breakpoint ┆ category │ │ --- ┆ --- ┆ --- │ │ i64 ┆ f64 ┆ cat │ ╞═════╪════════════╪════════════╡ │ -2 ┆ -1.0 ┆ (-inf, -1] │ │ -1 ┆ -1.0 ┆ (-inf, -1] │ │ 0 ┆ 1.0 ┆ (-1, 1] │ │ 1 ┆ 1.0 ┆ (-1, 1] │ │ 2 ┆ inf ┆ (1, inf] │ └─────┴────────────┴────────────┘ """ def rle(self) -> Series: """ Compress the Series data using run-length encoding. Run-length encoding (RLE) encodes data by storing each *run* of identical values as a single value and its length. Returns ------- Series Series of data type `Struct` with fields `len` of data type `UInt32` and `value` of the original data type. Examples -------- >>> s = pl.Series("s", [1, 1, 2, 1, None, 1, 3, 3]) >>> s.rle().struct.unnest() shape: (6, 2) ┌─────┬───────┐ │ len ┆ value │ │ --- ┆ --- │ │ u32 ┆ i64 │ ╞═════╪═══════╡ │ 2 ┆ 1 │ │ 1 ┆ 2 │ │ 1 ┆ 1 │ │ 1 ┆ null │ │ 1 ┆ 1 │ │ 2 ┆ 3 │ └─────┴───────┘ """ def rle_id(self) -> Series: """ Get a distinct integer ID for each run of identical values. The ID starts at 0 and increases by one each time the value of the column changes. Returns ------- Series Series of data type `UInt32`. See Also -------- rle Notes ----- This functionality is especially useful for defining a new group for every time a column's value changes, rather than for every distinct value of that column. Examples -------- >>> s = pl.Series("s", [1, 1, 2, 1, None, 1, 3, 3]) >>> s.rle_id() shape: (8,) Series: 's' [u32] [ 0 0 1 2 3 4 5 5 ] """ @unstable() def hist( self, bins: list[float] | None = None, *, bin_count: int | None = None, include_category: bool = True, include_breakpoint: bool = True, ) -> DataFrame: """ Bin values into buckets and count their occurrences. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Parameters ---------- bins Bin edges. If None given, we determine the edges based on the data. bin_count If `bins` is not provided, `bin_count` uniform bins are created that fully encompass the data. include_breakpoint Include a column that indicates the upper breakpoint. include_category Include a column that shows the intervals as categories. Returns ------- DataFrame Examples -------- >>> a = pl.Series("a", [1, 3, 8, 8, 2, 1, 3]) >>> a.hist(bin_count=4) shape: (4, 3) ┌────────────┬─────────────┬───────┐ │ breakpoint ┆ category ┆ count │ │ --- ┆ --- ┆ --- │ │ f64 ┆ cat ┆ u32 │ ╞════════════╪═════════════╪═══════╡ │ 2.75 ┆ [1.0, 2.75] ┆ 3 │ │ 4.5 ┆ (2.75, 4.5] ┆ 2 │ │ 6.25 ┆ (4.5, 6.25] ┆ 0 │ │ 8.0 ┆ (6.25, 8.0] ┆ 2 │ └────────────┴─────────────┴───────┘ """ out = ( self.to_frame() .select_seq( F.col(self.name).hist( bins=bins, bin_count=bin_count, include_category=include_category, include_breakpoint=include_breakpoint, ) ) .to_series() ) if not include_breakpoint and not include_category: return out.to_frame() else: return out.struct.unnest() def value_counts( self, *, sort: bool = False, parallel: bool = False, name: str | None = None, normalize: bool = False, ) -> DataFrame: """ Count the occurrences of unique values. Parameters ---------- sort Sort the output by count, in descending order. If set to `False` (default), the order is non-deterministic. parallel Execute the computation in parallel. .. note:: This option should likely *not* be enabled in a `group_by` context, as the computation will already be parallelized per group. name Give the resulting count column a specific name; if `normalize` is True this defaults to "proportion", otherwise defaults to "count". normalize If True, the count is returned as the relative frequency of unique values normalized to 1.0. Returns ------- DataFrame Columns map the unique values to their count (or proportion). Examples -------- >>> s = pl.Series("color", ["red", "blue", "red", "green", "blue", "blue"]) >>> s.value_counts() # doctest: +IGNORE_RESULT shape: (3, 2) ┌───────┬───────┐ │ color ┆ count │ │ --- ┆ --- │ │ str ┆ u32 │ ╞═══════╪═══════╡ │ red ┆ 2 │ │ green ┆ 1 │ │ blue ┆ 3 │ └───────┴───────┘ Sort the output by count and customize the count column name. >>> s.value_counts(sort=True, name="n") shape: (3, 2) ┌───────┬─────┐ │ color ┆ n │ │ --- ┆ --- │ │ str ┆ u32 │ ╞═══════╪═════╡ │ blue ┆ 3 │ │ red ┆ 2 │ │ green ┆ 1 │ └───────┴─────┘ Return the count as a relative frequency, normalized to 1.0: >>> s.value_counts(sort=True, normalize=True, name="fraction") shape: (3, 2) ┌───────┬──────────┐ │ color ┆ fraction │ │ --- ┆ --- │ │ str ┆ f64 │ ╞═══════╪══════════╡ │ blue ┆ 0.5 │ │ red ┆ 0.333333 │ │ green ┆ 0.166667 │ └───────┴──────────┘ """ name = name or ("proportion" if normalize else "count") return pl.DataFrame._from_pydf( self._s.value_counts( sort=sort, parallel=parallel, name=name, normalize=normalize ) ) def unique_counts(self) -> Series: """ Return a count of the unique values in the order of appearance. Examples -------- >>> s = pl.Series("id", ["a", "b", "b", "c", "c", "c"]) >>> s.unique_counts() shape: (3,) Series: 'id' [u32] [ 1 2 3 ] """ def entropy(self, base: float = math.e, *, normalize: bool = True) -> float | None: """ Computes the entropy. Uses the formula `-sum(pk * log(pk))` where `pk` are discrete probabilities. Parameters ---------- base Given base, defaults to `e` normalize Normalize pk if it doesn't sum to 1. Examples -------- >>> a = pl.Series([0.99, 0.005, 0.005]) >>> a.entropy(normalize=True) 0.06293300616044681 >>> b = pl.Series([0.65, 0.10, 0.25]) >>> b.entropy(normalize=True) 0.8568409950394724 """ return ( self.to_frame() .select_seq(F.col(self.name).entropy(base, normalize=normalize)) .to_series() .item() ) @unstable() @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def cumulative_eval( self, expr: Expr, *, min_samples: int = 1, parallel: bool = False ) -> Series: """ Run an expression over a sliding window that increases `1` slot every iteration. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- expr Expression to evaluate min_samples Number of valid values there should be in the window before the expression is evaluated. valid values = `length - null_count` parallel Run in parallel. Don't do this in a group by or another operation that already has much parallelization. Warnings -------- This can be really slow as it can have `O(n^2)` complexity. Don't use this for operations that visit all elements. Examples -------- >>> s = pl.Series("values", [1, 2, 3, 4, 5]) >>> s.cumulative_eval(pl.element().first() - pl.element().last() ** 2) shape: (5,) Series: 'values' [i64] [ 0 -3 -8 -15 -24 ] """ def alias(self, name: str) -> Series: """ Rename the series. Parameters ---------- name The new name. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.alias("b") shape: (3,) Series: 'b' [i64] [ 1 2 3 ] """ s = self.clone() s._s.rename(name) return s def rename(self, name: str) -> Series: """ Rename this Series. Alias for :func:`Series.alias`. Parameters ---------- name New name. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.rename("b") shape: (3,) Series: 'b' [i64] [ 1 2 3 ] """ return self.alias(name) def chunk_lengths(self) -> list[int]: """ Get the length of each individual chunk. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s2 = pl.Series("a", [4, 5, 6]) Concatenate Series with rechunk = True >>> pl.concat([s, s2], rechunk=True).chunk_lengths() [6] Concatenate Series with rechunk = False >>> pl.concat([s, s2], rechunk=False).chunk_lengths() [3, 3] """ return self._s.chunk_lengths() def n_chunks(self) -> int: """ Get the number of chunks that this Series contains. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.n_chunks() 1 >>> s2 = pl.Series("a", [4, 5, 6]) Concatenate Series with rechunk = True >>> pl.concat([s, s2], rechunk=True).n_chunks() 1 Concatenate Series with rechunk = False >>> pl.concat([s, s2], rechunk=False).n_chunks() 2 """ return self._s.n_chunks() def cum_max(self, *, reverse: bool = False) -> Series: """ Get an array with the cumulative max computed at every element. Parameters ---------- reverse reverse the operation. Examples -------- >>> s = pl.Series("s", [3, 5, 1]) >>> s.cum_max() shape: (3,) Series: 's' [i64] [ 3 5 5 ] """ def cum_min(self, *, reverse: bool = False) -> Series: """ Get an array with the cumulative min computed at every element. Parameters ---------- reverse reverse the operation. Examples -------- >>> s = pl.Series("s", [1, 2, 3]) >>> s.cum_min() shape: (3,) Series: 's' [i64] [ 1 1 1 ] """ def cum_prod(self, *, reverse: bool = False) -> Series: """ Get an array with the cumulative product computed at every element. Parameters ---------- reverse reverse the operation. Notes ----- Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.cum_prod() shape: (3,) Series: 'a' [i64] [ 1 2 6 ] """ def cum_sum(self, *, reverse: bool = False) -> Series: """ Get an array with the cumulative sum computed at every element. Parameters ---------- reverse reverse the operation. Notes ----- Dtypes in {Int8, UInt8, Int16, UInt16} are cast to Int64 before summing to prevent overflow issues. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.cum_sum() shape: (3,) Series: 'a' [i64] [ 1 3 6 ] """ def cum_count(self, *, reverse: bool = False) -> Self: """ Return the cumulative count of the non-null values in the column. Parameters ---------- reverse Reverse the operation. Examples -------- >>> s = pl.Series(["x", "k", None, "d"]) >>> s.cum_count() shape: (4,) Series: '' [u32] [ 1 2 2 3 ] """ def slice(self, offset: int, length: int | None = None) -> Series: """ Get a slice of this Series. Parameters ---------- offset Start index. Negative indexing is supported. length Length of the slice. If set to `None`, all rows starting at the offset will be selected. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4]) >>> s.slice(1, 2) shape: (2,) Series: 'a' [i64] [ 2 3 ] """ return self._from_pyseries(self._s.slice(offset=offset, length=length)) def append(self, other: Series) -> Self: """ Append a Series to this one. The resulting series will consist of multiple chunks. Parameters ---------- other Series to append. Warnings -------- This method modifies the series in-place. The series is returned for convenience only. See Also -------- extend Examples -------- >>> a = pl.Series("a", [1, 2, 3]) >>> b = pl.Series("b", [4, 5]) >>> a.append(b) shape: (5,) Series: 'a' [i64] [ 1 2 3 4 5 ] The resulting series will consist of multiple chunks. >>> a.n_chunks() 2 """ require_same_type(self, other) self._s.append(other._s) return self def extend(self, other: Series) -> Self: """ Extend the memory backed by this Series with the values from another. Different from `append`, which adds the chunks from `other` to the chunks of this series, `extend` appends the data from `other` to the underlying memory locations and thus may cause a reallocation (which is expensive). If this does `not` cause a reallocation, the resulting data structure will not have any extra chunks and thus will yield faster queries. Prefer `extend` over `append` when you want to do a query after a single append. For instance, during online operations where you add `n` rows and rerun a query. Prefer `append` over `extend` when you want to append many times before doing a query. For instance, when you read in multiple files and want to store them in a single `Series`. In the latter case, finish the sequence of `append` operations with a `rechunk`. Parameters ---------- other Series to extend the series with. Warnings -------- This method modifies the series in-place. The series is returned for convenience only. See Also -------- append Examples -------- >>> a = pl.Series("a", [1, 2, 3]) >>> b = pl.Series("b", [4, 5]) >>> a.extend(b) shape: (5,) Series: 'a' [i64] [ 1 2 3 4 5 ] The resulting series will consist of a single chunk. >>> a.n_chunks() 1 """ require_same_type(self, other) self._s.extend(other._s) return self def filter(self, predicate: Series | Iterable[bool]) -> Self: """ Filter elements by a boolean mask. The original order of the remaining elements is preserved. Elements where the filter does not evaluate to True are discarded, including nulls. Parameters ---------- predicate Boolean mask. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> mask = pl.Series("", [True, False, True]) >>> s.filter(mask) shape: (2,) Series: 'a' [i64] [ 1 3 ] """ if not isinstance(predicate, Series): predicate = Series("", predicate) return self._from_pyseries(self._s.filter(predicate._s)) def head(self, n: int = 10) -> Series: """ Get the first `n` elements. Parameters ---------- n Number of elements to return. If a negative value is passed, return all elements except the last `abs(n)`. See Also -------- tail, slice Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.head(3) shape: (3,) Series: 'a' [i64] [ 1 2 3 ] Pass a negative value to get all rows `except` the last `abs(n)`. >>> s.head(-3) shape: (2,) Series: 'a' [i64] [ 1 2 ] """ if n < 0: n = max(0, self.len() + n) return self._from_pyseries(self._s.head(n)) def tail(self, n: int = 10) -> Series: """ Get the last `n` elements. Parameters ---------- n Number of elements to return. If a negative value is passed, return all elements except the first `abs(n)`. See Also -------- head, slice Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.tail(3) shape: (3,) Series: 'a' [i64] [ 3 4 5 ] Pass a negative value to get all rows `except` the first `abs(n)`. >>> s.tail(-3) shape: (2,) Series: 'a' [i64] [ 4 5 ] """ if n < 0: n = max(0, self.len() + n) return self._from_pyseries(self._s.tail(n)) def limit(self, n: int = 10) -> Series: """ Get the first `n` elements. Alias for :func:`Series.head`. Parameters ---------- n Number of elements to return. If a negative value is passed, return all elements except the last `abs(n)`. See Also -------- head Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.limit(3) shape: (3,) Series: 'a' [i64] [ 1 2 3 ] Pass a negative value to get all rows `except` the last `abs(n)`. >>> s.limit(-3) shape: (2,) Series: 'a' [i64] [ 1 2 ] """ return self.head(n) def gather_every(self, n: int, offset: int = 0) -> Series: """ Take every nth value in the Series and return as new Series. Parameters ---------- n Gather every *n*-th row. offset Start the row index at this offset. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4]) >>> s.gather_every(2) shape: (2,) Series: 'a' [i64] [ 1 3 ] >>> s.gather_every(2, offset=1) shape: (2,) Series: 'a' [i64] [ 2 4 ] """ def sort( self, *, descending: bool = False, nulls_last: bool = False, multithreaded: bool = True, in_place: bool = False, ) -> Self: """ Sort this Series. Parameters ---------- descending Sort in descending order. nulls_last Place null values last instead of first. multithreaded Sort using multiple threads. in_place Sort in-place. Examples -------- >>> s = pl.Series("a", [1, 3, 4, 2]) >>> s.sort() shape: (4,) Series: 'a' [i64] [ 1 2 3 4 ] >>> s.sort(descending=True) shape: (4,) Series: 'a' [i64] [ 4 3 2 1 ] """ if in_place: self._s = self._s.sort(descending, nulls_last, multithreaded) return self else: return self._from_pyseries( self._s.sort(descending, nulls_last, multithreaded) ) def top_k(self, k: int = 5) -> Series: r""" Return the `k` largest elements. Non-null elements are always preferred over null elements. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. This has time complexity: .. math:: O(n) Parameters ---------- k Number of elements to return. See Also -------- top_k_by bottom_k bottom_k_by Examples -------- >>> s = pl.Series("a", [2, 5, 1, 4, 3]) >>> s.top_k(3) shape: (3,) Series: 'a' [i64] [ 5 4 3 ] """ def top_k_by( self, by: IntoExpr | Iterable[IntoExpr], k: int = 5, *, reverse: bool | Sequence[bool] = False, ) -> Series: r""" Return the `k` largest elements of the `by` column. Non-null elements are always preferred over null elements, regardless of the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. This has time complexity: .. math:: O(n \log{n}) Parameters ---------- by Column used to determine the largest elements. Accepts expression input. Strings are parsed as column names. k Number of elements to return. reverse Consider the `k` smallest elements of the `by` column (instead of the `k` largest). This can be specified per column by passing a sequence of booleans. See Also -------- top_k bottom_k bottom_k_by Examples -------- >>> s = pl.Series("a", [2, 5, 1, 4, 3]) >>> s.top_k_by("a", 3) shape: (3,) Series: 'a' [i64] [ 5 4 3 ] """ def bottom_k(self, k: int = 5) -> Series: r""" Return the `k` smallest elements. Non-null elements are always preferred over null elements. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. This has time complexity: .. math:: O(n) Parameters ---------- k Number of elements to return. See Also -------- top_k top_k_by bottom_k_by Examples -------- >>> s = pl.Series("a", [2, 5, 1, 4, 3]) >>> s.bottom_k(3) shape: (3,) Series: 'a' [i64] [ 1 2 3 ] """ def bottom_k_by( self, by: IntoExpr | Iterable[IntoExpr], k: int = 5, *, reverse: bool | Sequence[bool] = False, ) -> Series: r""" Return the `k` smallest elements of the `by` column. Non-null elements are always preferred over null elements, regardless of the value of `reverse`. The output is not guaranteed to be in any particular order, call :func:`sort` after this function if you wish the output to be sorted. This has time complexity: .. math:: O(n \log{n}) Parameters ---------- by Column used to determine the smallest elements. Accepts expression input. Strings are parsed as column names. k Number of elements to return. reverse Consider the `k` largest elements of the `by` column( (instead of the `k` smallest). This can be specified per column by passing a sequence of booleans. See Also -------- top_k top_k_by bottom_k Examples -------- >>> s = pl.Series("a", [2, 5, 1, 4, 3]) >>> s.bottom_k_by("a", 3) shape: (3,) Series: 'a' [i64] [ 1 2 3 ] """ def arg_sort(self, *, descending: bool = False, nulls_last: bool = False) -> Series: """ Get the index values that would sort this Series. Parameters ---------- descending Sort in descending order. nulls_last Place null values last instead of first. See Also -------- Series.gather: Take values by index. Series.rank : Get the rank of each row. Examples -------- >>> s = pl.Series("a", [5, 3, 4, 1, 2]) >>> s.arg_sort() shape: (5,) Series: 'a' [u32] [ 3 4 1 2 0 ] """ def arg_unique(self) -> Series: """ Get unique index as Series. Returns ------- Series Examples -------- >>> s = pl.Series("a", [1, 2, 2, 3]) >>> s.arg_unique() shape: (3,) Series: 'a' [u32] [ 0 1 3 ] """ def arg_min(self) -> int | None: """ Get the index of the minimal value. Returns ------- int Examples -------- >>> s = pl.Series("a", [3, 2, 1]) >>> s.arg_min() 2 """ return self._s.arg_min() def arg_max(self) -> int | None: """ Get the index of the maximal value. Returns ------- int Examples -------- >>> s = pl.Series("a", [3, 2, 1]) >>> s.arg_max() 0 """ return self._s.arg_max() @overload def search_sorted( self, element: NonNestedLiteral | None, side: SearchSortedSide = ..., *, descending: bool = ..., ) -> int: ... @overload def search_sorted( self, element: list[NonNestedLiteral | None] | np.ndarray[Any, Any] | Expr | Series, side: SearchSortedSide = ..., *, descending: bool = ..., ) -> Series: ... def search_sorted( self, element: IntoExpr | np.ndarray[Any, Any] | None, side: SearchSortedSide = "any", *, descending: bool = False, ) -> int | Series: """ Find indices where elements should be inserted to maintain order. .. math:: a[i-1] < v <= a[i] Parameters ---------- element Expression or scalar value. side : {'any', 'left', 'right'} If 'any', the index of the first suitable location found is given. If 'left', the index of the leftmost suitable location found is given. If 'right', return the rightmost suitable location found is given. descending Boolean indicating whether the values are descending or not (they are required to be sorted either way). Examples -------- >>> s = pl.Series("set", [1, 2, 3, 4, 4, 5, 6, 7]) >>> s.search_sorted(4) 3 >>> s.search_sorted(4, "left") 3 >>> s.search_sorted(4, "right") 5 >>> s.search_sorted([1, 4, 5]) shape: (3,) Series: 'set' [u32] [ 0 3 5 ] >>> s.search_sorted([1, 4, 5], "left") shape: (3,) Series: 'set' [u32] [ 0 3 5 ] >>> s.search_sorted([1, 4, 5], "right") shape: (3,) Series: 'set' [u32] [ 1 5 6 ] """ df = F.select(F.lit(self).search_sorted(element, side, descending=descending)) if isinstance(element, (list, Series, pl.Expr)): return df.to_series() elif _check_for_numpy(element) and isinstance(element, np.ndarray): return df.to_series() else: return df.item() def unique(self, *, maintain_order: bool = False) -> Series: """ Get unique elements in series. Parameters ---------- maintain_order Maintain order of data. This requires more work. Examples -------- >>> s = pl.Series("a", [1, 2, 2, 3]) >>> s.unique().sort() shape: (3,) Series: 'a' [i64] [ 1 2 3 ] """ def gather( self, indices: int | list[int] | Expr | Series | np.ndarray[Any, Any] ) -> Series: """ Take values by index. Parameters ---------- indices Index location used for selection. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4]) >>> s.gather([1, 3]) shape: (2,) Series: 'a' [i64] [ 2 4 ] """ def null_count(self) -> int: """ Count the null values in this Series. Examples -------- >>> s = pl.Series([1, None, None]) >>> s.null_count() 2 """ return self._s.null_count() def has_nulls(self) -> bool: """ Check whether the Series contains one or more null values. Examples -------- >>> s = pl.Series([1, 2, None]) >>> s.has_nulls() True >>> s[:2].has_nulls() False """ return self._s.has_nulls() @deprecated( "`has_validity` is deprecated; use `has_nulls` " "instead to check for the presence of null values." ) def has_validity(self) -> bool: """ Check whether the Series contains one or more null values. .. deprecated:: 0.20.30 Use the :meth:`has_nulls` method instead. """ return self._s.has_nulls() def is_empty(self) -> bool: """ Check if the Series is empty. Examples -------- >>> s = pl.Series("a", [], dtype=pl.Float32) >>> s.is_empty() True """ return self.len() == 0 def is_sorted(self, *, descending: bool = False, nulls_last: bool = False) -> bool: """ Check if the Series is sorted. Parameters ---------- descending Check if the Series is sorted in descending order nulls_last Set nulls at the end of the Series in sorted check. Examples -------- >>> s = pl.Series([1, 3, 2]) >>> s.is_sorted() False >>> s = pl.Series([3, 2, 1]) >>> s.is_sorted(descending=True) True """ return self._s.is_sorted(descending, nulls_last) def not_(self) -> Series: """ Negate a boolean Series. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series("a", [True, False, False]) >>> s.not_() shape: (3,) Series: 'a' [bool] [ false true true ] """ return self._from_pyseries(self._s.not_()) def is_null(self) -> Series: """ Returns a boolean Series indicating which values are null. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, None]) >>> s.is_null() shape: (4,) Series: 'a' [bool] [ false false false true ] """ def is_not_null(self) -> Series: """ Returns a boolean Series indicating which values are not null. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, None]) >>> s.is_not_null() shape: (4,) Series: 'a' [bool] [ true true true false ] """ def is_finite(self) -> Series: """ Returns a boolean Series indicating which values are finite. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> import numpy as np >>> s = pl.Series("a", [1.0, 2.0, np.inf]) >>> s.is_finite() shape: (3,) Series: 'a' [bool] [ true true false ] """ def is_infinite(self) -> Series: """ Returns a boolean Series indicating which values are infinite. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> import numpy as np >>> s = pl.Series("a", [1.0, 2.0, np.inf]) >>> s.is_infinite() shape: (3,) Series: 'a' [bool] [ false false true ] """ def is_nan(self) -> Series: """ Returns a boolean Series indicating which values are NaN. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> import numpy as np >>> s = pl.Series("a", [1.0, 2.0, 3.0, np.nan]) >>> s.is_nan() shape: (4,) Series: 'a' [bool] [ false false false true ] """ def is_not_nan(self) -> Series: """ Returns a boolean Series indicating which values are not NaN. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> import numpy as np >>> s = pl.Series("a", [1.0, 2.0, 3.0, np.nan]) >>> s.is_not_nan() shape: (4,) Series: 'a' [bool] [ true true true false ] """ def is_in( self, other: Series | Collection[Any], *, nulls_equal: bool = False, ) -> Series: """ Check if elements of this Series are in the other Series. Parameters ---------- other A Series or collection to search in. nulls_equal : bool, default False If True, treat null as a distinct value. Null values will not propagate. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s2 = pl.Series("b", [2, 4, None]) >>> s2.is_in(s) shape: (3,) Series: 'b' [bool] [ true false null ] >>> # when nulls_equal=True, None is treated as a distinct value >>> s2.is_in(s, nulls_equal=True) shape: (3,) Series: 'b' [bool] [ true false false ] >>> # check if some values are a member of sublists >>> sets = pl.Series("sets", [[1, 2, 3], [1, 2], [9, 10]]) >>> optional_members = pl.Series("optional_members", [1, 2, 3]) >>> print(sets) shape: (3,) Series: 'sets' [list[i64]] [ [1, 2, 3] [1, 2] [9, 10] ] >>> print(optional_members) shape: (3,) Series: 'optional_members' [i64] [ 1 2 3 ] >>> optional_members.is_in(sets) shape: (3,) Series: 'optional_members' [bool] [ true true false ] """ def arg_true(self) -> Series: """ Get index values where Boolean Series evaluate True. Returns ------- Series Series of data type :class:`UInt32`. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> (s == 2).arg_true() shape: (1,) Series: 'a' [u32] [ 1 ] """ return F.arg_where(self, eager=True) def is_unique(self) -> Series: """ Get mask of all unique values. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series("a", [1, 2, 2, 3]) >>> s.is_unique() shape: (4,) Series: 'a' [bool] [ true false false true ] """ def is_first_distinct(self) -> Series: """ Return a boolean mask indicating the first occurrence of each distinct value. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series([1, 1, 2, 3, 2]) >>> s.is_first_distinct() shape: (5,) Series: '' [bool] [ true false true true false ] """ def is_last_distinct(self) -> Series: """ Return a boolean mask indicating the last occurrence of each distinct value. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series([1, 1, 2, 3, 2]) >>> s.is_last_distinct() shape: (5,) Series: '' [bool] [ false true false true true ] """ def is_duplicated(self) -> Series: """ Get mask of all duplicated values. Returns ------- Series Series of data type :class:`Boolean`. Examples -------- >>> s = pl.Series("a", [1, 2, 2, 3]) >>> s.is_duplicated() shape: (4,) Series: 'a' [bool] [ false true true false ] """ def explode(self) -> Series: """ Explode a list Series. This means that every item is expanded to a new row. Returns ------- Series Series with the data type of the list elements. See Also -------- Series.list.explode : Explode a list column. Examples -------- >>> s = pl.Series("a", [[1, 2, 3], [4, 5, 6]]) >>> s shape: (2,) Series: 'a' [list[i64]] [ [1, 2, 3] [4, 5, 6] ] >>> s.explode() shape: (6,) Series: 'a' [i64] [ 1 2 3 4 5 6 ] """ @deprecate_renamed_parameter("strict", "check_dtypes", version="0.20.31") def equals( self, other: Series, *, check_dtypes: bool = False, check_names: bool = False, null_equal: bool = True, ) -> bool: """ Check whether the Series is equal to another Series. .. versionchanged:: 0.20.31 The `strict` parameter was renamed `check_dtypes`. Parameters ---------- other Series to compare with. check_dtypes Require data types to match. check_names Require names to match. null_equal Consider null values as equal. See Also -------- polars.testing.assert_series_equal Examples -------- >>> s1 = pl.Series("a", [1, 2, 3]) >>> s2 = pl.Series("b", [4, 5, 6]) >>> s1.equals(s1) True >>> s1.equals(s2) False """ require_same_type(self, other) return self._s.equals( other._s, check_dtypes=check_dtypes, check_names=check_names, null_equal=null_equal, ) def cast( self, dtype: type[int | float | str | bool] | PolarsDataType, *, strict: bool = True, wrap_numerical: bool = False, ) -> Self: r""" Cast between data types. Parameters ---------- dtype DataType to cast to. strict If True invalid casts generate exceptions instead of `null`\s. wrap_numerical If True numeric casts wrap overflowing values instead of marking the cast as invalid. Examples -------- >>> s = pl.Series("a", [True, False, True]) >>> s shape: (3,) Series: 'a' [bool] [ true false true ] >>> s.cast(pl.UInt32) shape: (3,) Series: 'a' [u32] [ 1 0 1 ] """ # Do not dispatch cast as it is expensive and used in other functions. dtype = parse_into_dtype(dtype) return self._from_pyseries(self._s.cast(dtype, strict, wrap_numerical)) def to_physical(self) -> Series: """ Cast to physical representation of the logical dtype. - :func:`polars.datatypes.Date` -> :func:`polars.datatypes.Int32` - :func:`polars.datatypes.Datetime` -> :func:`polars.datatypes.Int64` - :func:`polars.datatypes.Time` -> :func:`polars.datatypes.Int64` - :func:`polars.datatypes.Duration` -> :func:`polars.datatypes.Int64` - :func:`polars.datatypes.Categorical` -> :func:`polars.datatypes.UInt32` - `List(inner)` -> `List(physical of inner)` - `Array(inner)` -> `Array(physical of inner)` - `Struct(fields)` -> `Struct(physical of fields)` - Other data types will be left unchanged. Warnings -------- The physical representations are an implementation detail and not guaranteed to be stable. Examples -------- Replicating the pandas `pd.Series.factorize `_ method. >>> s = pl.Series("values", ["a", None, "x", "a"]) >>> s.cast(pl.Categorical).to_physical() shape: (4,) Series: 'values' [u32] [ 0 null 1 0 ] """ def to_list(self) -> list[Any]: """ Convert this Series to a Python list. This operation copies data. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.to_list() [1, 2, 3] >>> type(s.to_list()) """ return self._s.to_list() def rechunk(self, *, in_place: bool = False) -> Self: """ Create a single chunk of memory for this Series. Parameters ---------- in_place In place or not. Examples -------- >>> s1 = pl.Series("a", [1, 2, 3]) >>> s1.n_chunks() 1 >>> s2 = pl.Series("a", [4, 5, 6]) >>> s = pl.concat([s1, s2], rechunk=False) >>> s.n_chunks() 2 >>> s.rechunk(in_place=True) shape: (6,) Series: 'a' [i64] [ 1 2 3 4 5 6 ] >>> s.n_chunks() 1 """ opt_s = self._s.rechunk(in_place) if in_place: return self else: assert opt_s is not None return self._from_pyseries(opt_s) def reverse(self) -> Series: """ Return Series in reverse order. Examples -------- >>> s = pl.Series("a", [1, 2, 3], dtype=pl.Int8) >>> s.reverse() shape: (3,) Series: 'a' [i8] [ 3 2 1 ] """ def is_between( self, lower_bound: IntoExpr, upper_bound: IntoExpr, closed: ClosedInterval = "both", ) -> Series: """ Get a boolean mask of the values that are between the given lower/upper bounds. Parameters ---------- lower_bound Lower bound value. Accepts expression input. Non-expression inputs (including strings) are parsed as literals. upper_bound Upper bound value. Accepts expression input. Non-expression inputs (including strings) are parsed as literals. closed : {'both', 'left', 'right', 'none'} Define which sides of the interval are closed (inclusive). Notes ----- If the value of the `lower_bound` is greater than that of the `upper_bound` then the result will be False, as no value can satisfy the condition. Examples -------- >>> s = pl.Series("num", [1, 2, 3, 4, 5]) >>> s.is_between(2, 4) shape: (5,) Series: 'num' [bool] [ false true true true false ] Use the `closed` argument to include or exclude the values at the bounds: >>> s.is_between(2, 4, closed="left") shape: (5,) Series: 'num' [bool] [ false true true false false ] You can also use strings as well as numeric/temporal values: >>> s = pl.Series("s", ["a", "b", "c", "d", "e"]) >>> s.is_between("b", "d", closed="both") shape: (5,) Series: 's' [bool] [ false true true true false ] """ if closed == "none": out = (self > lower_bound) & (self < upper_bound) elif closed == "both": out = (self >= lower_bound) & (self <= upper_bound) elif closed == "right": out = (self > lower_bound) & (self <= upper_bound) elif closed == "left": out = (self >= lower_bound) & (self < upper_bound) if isinstance(out, pl.Expr): out = F.select(out).to_series() return out def is_close( self, other: IntoExpr, *, abs_tol: float = 0.0, rel_tol: float = 1e-09, nans_equal: bool = False, ) -> Series: r""" Get a boolean mask of the values being close to the other values. Two values `a` and `b` are considered close if the following condition holds: .. math:: |a-b| \le max \{ \text{rel_tol} \cdot max \{ |a|, |b| \}, \text{abs_tol} \} Parameters ---------- other A literal or expression value to compare with. abs_tol Absolute tolerance. This is the maximum allowed absolute difference between two values. Must be non-negative. rel_tol Relative tolerance. This is the maximum allowed difference between two values, relative to the larger absolute value. Must be non-negative. nans_equal Whether NaN values should be considered equal. Returns ------- Series Series of data type :class:`Boolean`. Notes ----- The implementation of this method is symmetric and mirrors the behavior of :meth:`math.isclose`. Specifically note that this behavior is different to :meth:`numpy.isclose`. Examples -------- >>> s = pl.Series("s", [1.0, 1.2, 1.4, 1.45, 1.6]) >>> s.is_close(1.4, abs_tol=0.1) shape: (5,) Series: 's' [bool] [ false false true true false ] """ return F.select( F.lit(self).is_close( other, abs_tol=abs_tol, rel_tol=rel_tol, nans_equal=nans_equal ) ).to_series() def to_numpy( self, *, writable: bool = False, allow_copy: bool = True, use_pyarrow: bool | None = None, zero_copy_only: bool | None = None, ) -> np.ndarray[Any, Any]: """ Convert this Series to a NumPy ndarray. This operation copies data only when necessary. The conversion is zero copy when all of the following hold: - The data type is an integer, float, `Datetime`, `Duration`, or `Array`. - The Series contains no null values. - The Series consists of a single chunk. - The `writable` parameter is set to `False` (default). Parameters ---------- writable Ensure the resulting array is writable. This will force a copy of the data if the array was created without copy as the underlying Arrow data is immutable. allow_copy Allow memory to be copied to perform the conversion. If set to `False`, causes conversions that are not zero-copy to fail. use_pyarrow First convert to PyArrow, then call `pyarrow.Array.to_numpy `_ to convert to NumPy. If set to `False`, Polars' own conversion logic is used. .. deprecated:: 0.20.28 Polars now uses its native engine by default for conversion to NumPy. To use PyArrow's engine, call `.to_arrow().to_numpy()` instead. zero_copy_only Raise an exception if the conversion to a NumPy would require copying the underlying data. Data copy occurs, for example, when the Series contains nulls or non-numeric types. .. deprecated:: 0.20.10 Use the `allow_copy` parameter instead, which is the inverse of this one. Examples -------- Numeric data without nulls can be converted without copying data. The resulting array will not be writable. >>> s = pl.Series([1, 2, 3], dtype=pl.Int8) >>> arr = s.to_numpy() >>> arr array([1, 2, 3], dtype=int8) >>> arr.flags.writeable False Set `writable=True` to force data copy to make the array writable. >>> s.to_numpy(writable=True).flags.writeable True Integer Series containing nulls will be cast to a float type with `nan` representing a null value. This requires data to be copied. >>> s = pl.Series([1, 2, None], dtype=pl.UInt16) >>> s.to_numpy() array([ 1., 2., nan], dtype=float32) Set `allow_copy=False` to raise an error if data would be copied. >>> s.to_numpy(allow_copy=False) # doctest: +SKIP Traceback (most recent call last): ... RuntimeError: copy not allowed: cannot convert to a NumPy array without copying data Series of data type `Array` and `Struct` will result in an array with more than one dimension. >>> s = pl.Series([[1, 2, 3], [4, 5, 6]], dtype=pl.Array(pl.Int64, 3)) >>> s.to_numpy() array([[1, 2, 3], [4, 5, 6]]) """ # noqa: W505 if zero_copy_only is not None: issue_deprecation_warning( "the `zero_copy_only` parameter for `Series.to_numpy` is deprecated." " Use the `allow_copy` parameter instead, which is the inverse of `zero_copy_only`.", version="0.20.10", ) allow_copy = not zero_copy_only if use_pyarrow is not None: issue_deprecation_warning( "the `use_pyarrow` parameter for `Series.to_numpy` is deprecated." " Polars now uses its native engine for conversion to NumPy by default." " To use PyArrow's engine, call `.to_arrow().to_numpy()` instead.", version="0.20.28", ) else: use_pyarrow = False if ( use_pyarrow and _PYARROW_AVAILABLE and self.dtype not in (Date, Datetime, Duration, Array, Object) ): if not allow_copy and self.n_chunks() > 1 and not self.is_empty(): msg = "cannot return a zero-copy array" raise ValueError(msg) return self.to_arrow().to_numpy( zero_copy_only=not allow_copy, writable=writable ) return self._s.to_numpy(writable=writable, allow_copy=allow_copy) @unstable() def to_jax(self, device: jax.Device | str | None = None) -> jax.Array: """ Convert this Series to a Jax Array. .. versionadded:: 0.20.27 .. warning:: This functionality is currently considered **unstable**. It may be changed at any point without it being considered a breaking change. Parameters ---------- device Specify the jax `Device` on which the array will be created; can provide a string (such as "cpu", "gpu", or "tpu") in which case the device is retrieved as `jax.devices(string)[0]`. For more specific control you can supply the instantiated `Device` directly. If None, arrays are created on the default device. Examples -------- >>> s = pl.Series("x", [10.5, 0.0, -10.0, 5.5]) >>> s.to_jax() Array([ 10.5, 0. , -10. , 5.5], dtype=float32) """ jx = import_optional( "jax", install_message="Please see `https://jax.readthedocs.io/en/latest/installation.html` " "for specific installation recommendations for the Jax package", ) if isinstance(device, str): device = jx.devices(device)[0] if ( jx.config.jax_enable_x64 or bool(int(os.environ.get("JAX_ENABLE_X64", "0"))) or self.dtype not in {Float64, Int64, UInt64} ): srs = self else: single_precision = {Float64: Float32, Int64: Int32, UInt64: UInt32} srs = self.cast(single_precision[self.dtype]) # type: ignore[index] with nullcontext() if device is None else jx.default_device(device): return jx.numpy.asarray( # note: jax arrays are immutable, so can avoid a copy (vs torch) a=srs.to_numpy(writable=False), order="K", ) @unstable() def to_torch(self) -> torch.Tensor: """ Convert this Series to a PyTorch Tensor. .. versionadded:: 0.20.23 .. warning:: This functionality is currently considered **unstable**. It may be changed at any point without it being considered a breaking change. Notes ----- PyTorch tensors do not support UInt16, UInt32, or UInt64; these dtypes will be automatically cast to Int32, Int64, and Int64, respectively. Examples -------- >>> s = pl.Series("x", [1, 0, 1, 2, 0], dtype=pl.UInt8) >>> s.to_torch() tensor([1, 0, 1, 2, 0], dtype=torch.uint8) >>> s = pl.Series("x", [5.5, -10.0, 2.5], dtype=pl.Float32) >>> s.to_torch() tensor([ 5.5000, -10.0000, 2.5000]) """ torch = import_optional("torch") # PyTorch tensors do not support uint16/32/64 if self.dtype in (UInt32, UInt64): srs = self.cast(Int64) elif self.dtype == UInt16: srs = self.cast(Int32) else: srs = self # we have to build the tensor from a writable array or PyTorch will complain # about it (writing to a readonly array results in undefined behavior) numpy_array = srs.to_numpy(writable=True) try: tensor = torch.from_numpy(numpy_array) except TypeError: if self.dtype == List: msg = "cannot convert List dtype to Tensor (use Array dtype instead)" raise TypeError(msg) from None raise # note: named tensors are currently experimental # tensor.rename(self.name) return tensor @deprecate_renamed_parameter("future", "compat_level", version="1.1") def to_arrow(self, *, compat_level: CompatLevel | None = None) -> pa.Array: """ Return the underlying Arrow array. If the Series contains only a single chunk this operation is zero copy. .. versionchanged:: 1.24 The `future` parameter was renamed `compat_level`. Parameters ---------- compat_level Use a specific compatibility level when exporting Polars' internal data structures. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s = s.to_arrow() >>> s [ 1, 2, 3 ] """ compat_level_py: int | bool if compat_level is None: compat_level_py = False elif isinstance(compat_level, CompatLevel): compat_level_py = compat_level._version else: msg = f"`compat_level` has invalid type: {qualified_type_name(compat_level)!r}" raise TypeError(msg) return self._s.to_arrow(compat_level_py) def to_pandas( self, *, use_pyarrow_extension_array: bool = False, **kwargs: Any ) -> pd.Series[Any]: """ Convert this Series to a pandas Series. This operation copies data if `use_pyarrow_extension_array` is not enabled. Parameters ---------- use_pyarrow_extension_array Use a PyArrow-backed extension array instead of a NumPy array for the pandas Series. This allows zero copy operations and preservation of null values. Subsequent operations on the resulting pandas Series may trigger conversion to NumPy if those operations are not supported by PyArrow compute functions. **kwargs Additional keyword arguments to be passed to :meth:`pyarrow.Array.to_pandas`. Returns ------- :class:`pandas.Series` Notes ----- This operation requires that both :mod:`pandas` and :mod:`pyarrow` are installed. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.to_pandas() 0 1 1 2 2 3 Name: a, dtype: int64 Null values are converted to `NaN`. >>> s = pl.Series("b", [1, 2, None]) >>> s.to_pandas() 0 1.0 1 2.0 2 NaN Name: b, dtype: float64 Pass `use_pyarrow_extension_array=True` to get a pandas Series backed by a PyArrow extension array. This will preserve null values. >>> s.to_pandas(use_pyarrow_extension_array=True) 0 1 1 2 2 Name: b, dtype: int64[pyarrow] """ if self.dtype == Object: # Can't convert via PyArrow, so do it via NumPy return pd.Series(self.to_numpy(), dtype=object, name=self.name) if use_pyarrow_extension_array: if parse_version(pd.__version__) < (1, 5): msg = f'pandas>=1.5.0 is required for `to_pandas("use_pyarrow_extension_array=True")`, found Pandas {pd.__version__}' raise ModuleUpgradeRequiredError(msg) if not _PYARROW_AVAILABLE or parse_version(pa.__version__) < (8, 0): raise ModuleUpgradeRequiredError( f'pyarrow>=8.0.0 is required for `to_pandas("use_pyarrow_extension_array=True")`' f", found pyarrow {pa.__version__!r}" if _PYARROW_AVAILABLE else "" ) pa_arr = self.to_arrow() # pandas does not support unsigned dictionary indices if pa.types.is_dictionary(pa_arr.type): pa_arr = pa_arr.cast(pa.dictionary(pa.int64(), pa.large_string())) if use_pyarrow_extension_array: pd_series = pa_arr.to_pandas( self_destruct=True, split_blocks=True, types_mapper=lambda pa_dtype: pd.ArrowDtype(pa_dtype), **kwargs, ) else: date_as_object = kwargs.pop("date_as_object", False) pd_series = pa_arr.to_pandas(date_as_object=date_as_object, **kwargs) pd_series.name = self.name return pd_series def to_init_repr(self, n: int = 1000) -> str: """ Convert Series to instantiable string representation. Parameters ---------- n Only use first n elements. See Also -------- polars.Series.to_init_repr polars.from_repr Examples -------- >>> s = pl.Series("a", [1, 2, None, 4], dtype=pl.Int16) >>> print(s.to_init_repr()) pl.Series('a', [1, 2, None, 4], dtype=pl.Int16) >>> s_from_str_repr = eval(s.to_init_repr()) >>> s_from_str_repr shape: (4,) Series: 'a' [i16] [ 1 2 null 4 ] """ values = self.head(n).to_list() dtype_init_repr = dtype_to_init_repr(self.dtype) return f"pl.Series({self.name!r}, {values}, dtype={dtype_init_repr})" def count(self) -> int: """ Return the number of non-null elements in the column. See Also -------- len Examples -------- >>> s = pl.Series("a", [1, 2, None]) >>> s.count() 2 """ return self.len() - self.null_count() def len(self) -> int: """ Return the number of elements in the Series. Null values count towards the total. See Also -------- count Examples -------- >>> s = pl.Series("a", [1, 2, None]) >>> s.len() 3 """ return self._s.len() def set(self, filter: Series, value: int | float | str | bool | None) -> Series: """ Set masked values. Parameters ---------- filter Boolean mask. value Value with which to replace the masked values. Notes ----- Use of this function is frequently an anti-pattern, as it can block optimisation (predicate pushdown, etc). Consider using `pl.when(predicate).then(value).otherwise(self)` instead. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.set(s == 2, 10) shape: (3,) Series: 'a' [i64] [ 1 10 3 ] It is better to implement this as follows: >>> s.to_frame().select( ... pl.when(pl.col("a") == 2).then(10).otherwise(pl.col("a")) ... ) shape: (3, 1) ┌─────────┐ │ literal │ │ --- │ │ i64 │ ╞═════════╡ │ 1 │ │ 10 │ │ 3 │ └─────────┘ """ f = get_ffi_func("set_with_mask_<>", self.dtype, self._s) if f is None: msg = f"Series of type {self.dtype} can not be set" raise NotImplementedError(msg) return self._from_pyseries(f(filter._s, value)) def scatter( self, indices: Series | Iterable[int] | int | np.ndarray[Any, Any], values: Series | Iterable[PythonLiteral] | PythonLiteral | None, ) -> Series: """ Set values at the index locations. Parameters ---------- indices Integers representing the index locations. values Replacement values. Notes ----- Use of this function is frequently an anti-pattern, as it can block optimization (predicate pushdown, etc). Consider using `pl.when(predicate).then(value).otherwise(self)` instead. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.scatter(1, 10) shape: (3,) Series: 'a' [i64] [ 1 10 3 ] It is better to implement this as follows: >>> s.to_frame().with_row_index().select( ... pl.when(pl.col("index") == 1).then(10).otherwise(pl.col("a")) ... ) shape: (3, 1) ┌─────────┐ │ literal │ │ --- │ │ i64 │ ╞═════════╡ │ 1 │ │ 10 │ │ 3 │ └─────────┘ """ if not isinstance(indices, Iterable): index: Any = indices # Workaround for older NumPy versions indices = [index] indices = Series(values=indices) if indices.is_empty(): return self if not isinstance(values, Series): if not isinstance(values, Iterable) or isinstance(values, str): values = [values] values = Series(values=values) self._s.scatter(indices._s, values._s) return self def index_of(self, element: IntoExpr) -> int | None: """ Get the index of the first occurrence of a value, or ``None`` if it's not found. Parameters ---------- element Value to find. Examples -------- >>> s = pl.Series("a", [1, None, 17]) >>> s.index_of(17) 2 >>> s.index_of(None) # search for a null 1 >>> s.index_of(55) is None True """ return F.select(F.lit(self).index_of(element)).item() def clear(self, n: int = 0) -> Series: """ Create an empty copy of the current Series, with zero to 'n' elements. The copy has an identical name/dtype, but no data. Parameters ---------- n Number of (empty) elements to return in the cleared frame. See Also -------- clone : Cheap deepcopy/clone. Examples -------- >>> s = pl.Series("a", [None, True, False]) >>> s.clear() shape: (0,) Series: 'a' [bool] [ ] >>> s.clear(n=2) shape: (2,) Series: 'a' [bool] [ null null ] """ if n < 0: msg = f"`n` should be greater than or equal to 0, got {n}" raise ValueError(msg) # faster path if n == 0: return self._from_pyseries(self._s.clear()) s = ( self.__class__(name=self.name, values=[], dtype=self.dtype) if len(self) > 0 else self.clone() ) return s.extend_constant(None, n=n) if n > 0 else s def clone(self) -> Self: """ Create a copy of this Series. This is a cheap operation that does not copy data. See Also -------- clear : Create an empty copy of the current Series, with identical schema but no data. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.clone() shape: (3,) Series: 'a' [i64] [ 1 2 3 ] """ return self._from_pyseries(self._s.clone()) def fill_nan(self, value: int | float | Expr | None) -> Series: """ Fill floating point NaN value with a fill value. Parameters ---------- value Value used to fill NaN values. See Also -------- fill_null Notes ----- A NaN value is not the same as a null value. To fill null values, use :func:`fill_null`. Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, float("nan")]) >>> s.fill_nan(0) shape: (4,) Series: 'a' [f64] [ 1.0 2.0 3.0 0.0 ] """ def fill_null( self, value: Any | Expr | None = None, strategy: FillNullStrategy | None = None, limit: int | None = None, ) -> Series: """ Fill null values using the specified value or strategy. Parameters ---------- value Value used to fill null values. strategy : {None, 'forward', 'backward', 'min', 'max', 'mean', 'zero', 'one'} Strategy used to fill null values. limit Number of consecutive null values to fill when using the 'forward' or 'backward' strategy. See Also -------- backward_fill fill_nan forward_fill Notes ----- A null value is not the same as a NaN value. To fill NaN values, use :func:`fill_nan`. Examples -------- >>> s = pl.Series("a", [1, 2, 3, None]) >>> s.fill_null(strategy="forward") shape: (4,) Series: 'a' [i64] [ 1 2 3 3 ] >>> s.fill_null(strategy="min") shape: (4,) Series: 'a' [i64] [ 1 2 3 1 ] >>> s = pl.Series("b", ["x", None, "z"]) >>> s.fill_null(pl.lit("")) shape: (3,) Series: 'b' [str] [ "x" "" "z" ] """ def backward_fill(self, limit: int | None = None) -> Series: """ Fill missing values with the next non-null value. This is an alias of `.fill_null(strategy="backward")`. Parameters ---------- limit The number of consecutive null values to backward fill. See Also -------- fill_null forward_fill shift """ return self.fill_null(strategy="backward", limit=limit) def forward_fill(self, limit: int | None = None) -> Series: """ Fill missing values with the last non-null value. This is an alias of `.fill_null(strategy="forward")`. Parameters ---------- limit The number of consecutive null values to forward fill. See Also -------- backward_fill fill_null shift """ return self.fill_null(strategy="forward", limit=limit) def floor(self) -> Series: """ Rounds down to the nearest integer value. Only works on floating point Series. Examples -------- >>> s = pl.Series("a", [1.12345, 2.56789, 3.901234]) >>> s.floor() shape: (3,) Series: 'a' [f64] [ 1.0 2.0 3.0 ] """ def ceil(self) -> Series: """ Rounds up to the nearest integer value. Only works on floating point Series. Examples -------- >>> s = pl.Series("a", [1.12345, 2.56789, 3.901234]) >>> s.ceil() shape: (3,) Series: 'a' [f64] [ 2.0 3.0 4.0 ] """ def round(self, decimals: int = 0, mode: RoundMode = "half_to_even") -> Series: """ Round underlying floating point data by `decimals` digits. The default rounding mode is "half to even" (also known as "bankers' rounding"). Parameters ---------- decimals Number of decimals to round by. mode : {'half_to_even', 'half_away_from_zero'} Rounding mode. Examples -------- >>> s = pl.Series("a", [1.12345, 2.56789, 3.901234]) >>> s.round(2) shape: (3,) Series: 'a' [f64] [ 1.12 2.57 3.9 ] >>> s = pl.Series([-3.5, -2.5, -1.5, -0.5, 0.5, 1.5, 2.5, 3.5]) >>> s.round(mode="half_to_even") shape: (8,) Series: '' [f64] [ -4.0 -2.0 -2.0 -0.0 0.0 2.0 2.0 4.0 ] """ def round_sig_figs(self, digits: int) -> Series: """ Round to a number of significant figures. Parameters ---------- digits Number of significant figures to round to. Examples -------- >>> s = pl.Series([0.01234, 3.333, 3450.0]) >>> s.round_sig_figs(2) shape: (3,) Series: '' [f64] [ 0.012 3.3 3500.0 ] """ def dot(self, other: Series | ArrayLike) -> int | float | None: """ Compute the dot/inner product between two Series. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s2 = pl.Series("b", [4.0, 5.0, 6.0]) >>> s.dot(s2) 32.0 Parameters ---------- other Series (or array) to compute dot product with. """ if not isinstance(other, Series): other = Series(other) if len(self) != len(other): n, m = len(self), len(other) msg = f"Series length mismatch: expected {n!r}, found {m!r}" raise ShapeError(msg) return self._s.dot(other._s) def mode(self) -> Series: """ Compute the most occurring value(s). Can return multiple Values. Examples -------- >>> s = pl.Series("a", [1, 2, 2, 3]) >>> s.mode() shape: (1,) Series: 'a' [i64] [ 2 ] """ def sign(self) -> Series: """ Compute the element-wise sign function on numeric types. The returned value is computed as follows: * -1 if x < 0. * 1 if x > 0. * x otherwise (typically 0, but could be NaN if the input is). Null values are preserved as-is, and the dtype of the input is preserved. Examples -------- >>> s = pl.Series("a", [-9.0, -0.0, 0.0, 4.0, float("nan"), None]) >>> s.sign() shape: (6,) Series: 'a' [f64] [ -1.0 -0.0 0.0 1.0 NaN null ] """ def sin(self) -> Series: """ Compute the element-wise value for the sine. Examples -------- >>> import math >>> s = pl.Series("a", [0.0, math.pi / 2.0, math.pi]) >>> s.sin() shape: (3,) Series: 'a' [f64] [ 0.0 1.0 1.2246e-16 ] """ def cos(self) -> Series: """ Compute the element-wise value for the cosine. Examples -------- >>> import math >>> s = pl.Series("a", [0.0, math.pi / 2.0, math.pi]) >>> s.cos() shape: (3,) Series: 'a' [f64] [ 1.0 6.1232e-17 -1.0 ] """ def tan(self) -> Series: """ Compute the element-wise value for the tangent. Examples -------- >>> import math >>> s = pl.Series("a", [0.0, math.pi / 2.0, math.pi]) >>> s.tan() shape: (3,) Series: 'a' [f64] [ 0.0 1.6331e16 -1.2246e-16 ] """ def cot(self) -> Series: """ Compute the element-wise value for the cotangent. Examples -------- >>> import math >>> s = pl.Series("a", [0.0, math.pi / 2.0, math.pi]) >>> s.cot() shape: (3,) Series: 'a' [f64] [ inf 6.1232e-17 -8.1656e15 ] """ def arcsin(self) -> Series: """ Compute the element-wise value for the inverse sine. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.arcsin() shape: (3,) Series: 'a' [f64] [ 1.570796 0.0 -1.570796 ] """ def arccos(self) -> Series: """ Compute the element-wise value for the inverse cosine. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.arccos() shape: (3,) Series: 'a' [f64] [ 0.0 1.570796 3.141593 ] """ def arctan(self) -> Series: """ Compute the element-wise value for the inverse tangent. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.arctan() shape: (3,) Series: 'a' [f64] [ 0.785398 0.0 -0.785398 ] """ def arcsinh(self) -> Series: """ Compute the element-wise value for the inverse hyperbolic sine. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.arcsinh() shape: (3,) Series: 'a' [f64] [ 0.881374 0.0 -0.881374 ] """ def arccosh(self) -> Series: """ Compute the element-wise value for the inverse hyperbolic cosine. Examples -------- >>> s = pl.Series("a", [5.0, 1.0, 0.0, -1.0]) >>> s.arccosh() shape: (4,) Series: 'a' [f64] [ 2.292432 0.0 NaN NaN ] """ def arctanh(self) -> Series: """ Compute the element-wise value for the inverse hyperbolic tangent. Examples -------- >>> s = pl.Series("a", [2.0, 1.0, 0.5, 0.0, -0.5, -1.0, -1.1]) >>> s.arctanh() shape: (7,) Series: 'a' [f64] [ NaN inf 0.549306 0.0 -0.549306 -inf NaN ] """ def sinh(self) -> Series: """ Compute the element-wise value for the hyperbolic sine. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.sinh() shape: (3,) Series: 'a' [f64] [ 1.175201 0.0 -1.175201 ] """ def cosh(self) -> Series: """ Compute the element-wise value for the hyperbolic cosine. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.cosh() shape: (3,) Series: 'a' [f64] [ 1.543081 1.0 1.543081 ] """ def tanh(self) -> Series: """ Compute the element-wise value for the hyperbolic tangent. Examples -------- >>> s = pl.Series("a", [1.0, 0.0, -1.0]) >>> s.tanh() shape: (3,) Series: 'a' [f64] [ 0.761594 0.0 -0.761594 ] """ def map_elements( self, function: Callable[[Any], Any], return_dtype: PolarsDataType | None = None, *, skip_nulls: bool = True, ) -> Self: """ Map a custom/user-defined function (UDF) over elements in this Series. .. warning:: This method is much slower than the native expressions API. Only use it if you cannot implement your logic otherwise. Suppose that the function is: `x ↦ sqrt(x)`: - For mapping elements of a series, consider: `s.sqrt()`. - For mapping inner elements of lists, consider: `s.list.eval(pl.element().sqrt())`. - For mapping elements of struct fields, consider: `s.struct.field("field_name").sqrt()`. If the function returns a different datatype, the return_dtype arg should be set, otherwise the method will fail. Implementing logic using a Python function is almost always *significantly* slower and more memory intensive than implementing the same logic using the native expression API because: - The native expression engine runs in Rust; UDFs run in Python. - Use of Python UDFs forces the DataFrame to be materialized in memory. - Polars-native expressions can be parallelised (UDFs typically cannot). - Polars-native expressions can be logically optimised (UDFs cannot). Wherever possible you should strongly prefer the native expression API to achieve the best performance. Parameters ---------- function Custom function or lambda. return_dtype Output datatype. If not set, the dtype will be inferred based on the first non-null value that is returned by the function. skip_nulls Nulls will be skipped and not passed to the python function. This is faster because python can be skipped and because we call more specialized functions. Warnings -------- If `return_dtype` is not provided, this may lead to unexpected results. We allow this, but it is considered a bug in the user's query. Notes ----- * If your function is expensive and you don't want it to be called more than once for a given input, consider applying an `@lru_cache` decorator to it. If your data is suitable you may achieve *significant* speedups. * A UDF passed to `map_elements` must be pure, meaning that it cannot modify or depend on state other than its arguments. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.map_elements(lambda x: x + 10, return_dtype=pl.Int64) # doctest: +SKIP shape: (3,) Series: 'a' [i64] [ 11 12 13 ] Returns ------- Series """ from polars._utils.udfs import warn_on_inefficient_map if return_dtype is None: pl_return_dtype = None else: pl_return_dtype = parse_into_dtype(return_dtype) warn_on_inefficient_map(function, columns=[self.name], map_target="series") return self._from_pyseries( self._s.map_elements( function, return_dtype=pl_return_dtype, skip_nulls=skip_nulls ) ) def shift(self, n: int = 1, *, fill_value: IntoExpr | None = None) -> Series: """ Shift values by the given number of indices. Parameters ---------- n Number of indices to shift forward. If a negative value is passed, values are shifted in the opposite direction instead. fill_value Fill the resulting null values with this value. Accepts scalar expression input. Non-expression inputs are parsed as literals. Notes ----- This method is similar to the `LAG` operation in SQL when the value for `n` is positive. With a negative value for `n`, it is similar to `LEAD`. Examples -------- By default, values are shifted forward by one index. >>> s = pl.Series([1, 2, 3, 4]) >>> s.shift() shape: (4,) Series: '' [i64] [ null 1 2 3 ] Pass a negative value to shift in the opposite direction instead. >>> s.shift(-2) shape: (4,) Series: '' [i64] [ 3 4 null null ] Specify `fill_value` to fill the resulting null values. >>> s.shift(-2, fill_value=100) shape: (4,) Series: '' [i64] [ 3 4 100 100 ] """ def zip_with(self, mask: Series, other: Series) -> Self: """ Take values from self or other based on the given mask. Where mask evaluates true, take values from self. Where mask evaluates false, take values from other. Parameters ---------- mask Boolean Series. other Series of same type. Returns ------- Series Examples -------- >>> s1 = pl.Series([1, 2, 3, 4, 5]) >>> s2 = pl.Series([5, 4, 3, 2, 1]) >>> s1.zip_with(s1 < s2, s2) shape: (5,) Series: '' [i64] [ 1 2 3 2 1 ] >>> mask = pl.Series([True, False, True, False, True]) >>> s1.zip_with(mask, s2) shape: (5,) Series: '' [i64] [ 1 4 3 2 5 ] """ require_same_type(self, other) return self._from_pyseries(self._s.zip_with(mask._s, other._s)) @unstable() def rolling_min_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ) -> Self: """ Compute a rolling min based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling min with the temporal windows from the second series closed on the right: >>> s.rolling_min_by(d, "3h") shape: (25,) Series: 'index' [i64] [ 0 0 0 1 2 … 18 19 20 21 22 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_min( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Apply a rolling min (moving min) over the values in this array. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their min. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Examples -------- >>> s = pl.Series("a", [100, 200, 300, 400, 500]) >>> s.rolling_min(window_size=3) shape: (5,) Series: 'a' [i64] [ null null 100 200 300 ] """ @unstable() def rolling_max_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ) -> Self: """ Compute a rolling max based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling max with the temporal windows from the second series closed on the right: >>> s.rolling_max_by(d, "3h") shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_max( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Apply a rolling max (moving max) over the values in this array. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their max. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Examples -------- >>> s = pl.Series("a", [100, 200, 300, 400, 500]) >>> s.rolling_max(window_size=2) shape: (5,) Series: 'a' [i64] [ null 200 300 400 500 ] """ @unstable() def rolling_mean_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ) -> Self: """ Compute a rolling mean based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling mean with the temporal windows from the second series closed on the right: >>> s.rolling_mean_by(d, "3h") shape: (25,) Series: 'index' [f64] [ 0.0 0.5 1.0 2.0 3.0 … 19.0 20.0 21.0 22.0 23.0 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_mean( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Apply a rolling mean (moving mean) over the values in this array. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their mean. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Examples -------- >>> s = pl.Series("a", [100, 200, 300, 400, 500]) >>> s.rolling_mean(window_size=2) shape: (5,) Series: 'a' [f64] [ null 150.0 250.0 350.0 450.0 ] """ @unstable() def rolling_sum_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ) -> Self: """ Compute a rolling sum based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling mean with the temporal windows from the second series closed on the right: >>> s.rolling_sum_by(d, "3h") shape: (25,) Series: 'index' [i64] [ 0 1 3 6 9 … 57 60 63 66 69 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_sum( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Apply a rolling sum (moving sum) over the values in this array. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their sum. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.rolling_sum(window_size=2) shape: (5,) Series: 'a' [i64] [ null 3 5 7 9 ] """ @unstable() def rolling_std_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ddof: int = 1, ) -> Self: """ Compute a rolling standard deviation based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling std with the temporal windows from the second series closed on the right: >>> s.rolling_std_by(d, "3h") shape: (25,) Series: 'index' [f64] [ null 0.707107 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_std( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ddof: int = 1, ) -> Series: """ Compute a rolling std dev. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their std dev. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]) >>> s.rolling_std(window_size=3) shape: (6,) Series: 'a' [f64] [ null null 1.0 1.0 1.527525 2.0 ] """ @unstable() def rolling_var_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ddof: int = 1, ) -> Self: """ Compute a rolling variance based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling std with the temporal windows from the second series closed on the right: >>> s.rolling_std_by(d, "3h") shape: (25,) Series: 'index' [f64] [ null 0.707107 1.0 1.0 1.0 … 1.0 1.0 1.0 1.0 1.0 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_var( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ddof: int = 1, ) -> Series: """ Compute a rolling variance. A window of length `window_size` will traverse the array. The values that fill this window will (optionally) be multiplied with the weights given by the `weight` vector. The resulting values will be aggregated to their variance. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. ddof "Delta Degrees of Freedom": The divisor for a length N window is N - ddof Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]) >>> s.rolling_var(window_size=3) shape: (6,) Series: 'a' [f64] [ null null 1.0 1.0 2.333333 4.0 ] """ @unstable() @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_map( self, function: Callable[[Series], Any], window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Compute a custom rolling window function. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- function Custom aggregation function. window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Warnings -------- Computing custom functions is extremely slow. Use specialized rolling functions such as :func:`Series.rolling_sum` if at all possible. Examples -------- >>> from numpy import nansum >>> s = pl.Series([11.0, 2.0, 9.0, float("nan"), 8.0]) >>> s.rolling_map(nansum, window_size=3) shape: (5,) Series: '' [f64] [ null null 22.0 11.0 17.0 ] """ @unstable() def rolling_median_by( self, by: IntoExpr, window_size: timedelta | str, *, min_samples: int = 1, closed: ClosedInterval = "right", ) -> Self: """ Compute a rolling median based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling median with the temporal windows from the second series closed on the right: >>> s.rolling_median_by(d, "3h") shape: (25,) Series: 'index' [f64] [ 0.0 0.5 1.0 2.0 3.0 … 19.0 20.0 21.0 22.0 23.0 ] """ @unstable() @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_median( self, window_size: int, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Compute a rolling median. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]) >>> s.rolling_median(window_size=3) shape: (6,) Series: 'a' [f64] [ null null 2.0 3.0 4.0 6.0 ] """ @unstable() def rolling_quantile_by( self, by: IntoExpr, window_size: timedelta | str, *, quantile: float, interpolation: QuantileMethod = "nearest", min_samples: int = 1, closed: ClosedInterval = "right", ) -> Self: """ Compute a rolling quantile based on another series. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). quantile Quantile between 0.0 and 1.0. interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'} Interpolation method. window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Notes ----- If you want to compute multiple aggregation statistics over the same dynamic window, consider using `rolling` - this method can cache the window size computation. Examples -------- Create a series with a row index value >>> from datetime import timedelta, datetime >>> start = datetime(2001, 1, 1) >>> stop = datetime(2001, 1, 2) >>> s = pl.Series("index", range(25)) >>> s shape: (25,) Series: 'index' [i64] [ 0 1 2 3 4 … 20 21 22 23 24 ] Create another series to apply the window mask: >>> d = pl.Series("date", pl.datetime_range(start, stop, "1h", eager=True)) >>> d shape: (25,) Series: 'date' [datetime[μs]] [ 2001-01-01 00:00:00 2001-01-01 01:00:00 2001-01-01 02:00:00 2001-01-01 03:00:00 2001-01-01 04:00:00 … 2001-01-01 20:00:00 2001-01-01 21:00:00 2001-01-01 22:00:00 2001-01-01 23:00:00 2001-01-02 00:00:00 ] Compute the rolling quantile with the temporal windows from the second series closed on the right: >>> s.rolling_quantile_by(d, "3h", quantile=0.5) shape: (25,) Series: 'index' [f64] [ 0.0 1.0 1.0 2.0 3.0 … 19.0 20.0 21.0 22.0 23.0 ] """ # noqa: W505 @unstable() @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def rolling_quantile( self, quantile: float, interpolation: QuantileMethod = "nearest", window_size: int = 2, weights: list[float] | None = None, *, min_samples: int | None = None, center: bool = False, ) -> Series: """ Compute a rolling quantile. The window at a given row will include the row itself and the `window_size - 1` elements before it. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- quantile Quantile between 0.0 and 1.0. interpolation : {'nearest', 'higher', 'lower', 'midpoint', 'linear', 'equiprobable'} Interpolation method. window_size The length of the window in number of elements. weights An optional slice with the same length as the window that will be multiplied elementwise with the values in the window. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Examples -------- >>> s = pl.Series("a", [1.0, 2.0, 3.0, 4.0, 6.0, 8.0]) >>> s.rolling_quantile(quantile=0.33, window_size=3) shape: (6,) Series: 'a' [f64] [ null null 2.0 3.0 4.0 6.0 ] >>> s.rolling_quantile(quantile=0.33, interpolation="linear", window_size=3) shape: (6,) Series: 'a' [f64] [ null null 1.66 2.66 3.66 5.32 ] """ # noqa: W505 @unstable() def rolling_rank_by( self, by: IntoExpr, window_size: timedelta | str, method: RankMethod = "average", *, seed: int | None = None, min_samples: int = 1, closed: ClosedInterval = "right", ) -> Series: """ Compute a rolling rank based on another column. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. Given a `by` column ``, then `closed="right"` (the default) means the windows will be: - (t_0 - window_size, t_0] - (t_1 - window_size, t_1] - ... - (t_n - window_size, t_n] Parameters ---------- by Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type (note that the integral ones require using `'i'` in `window size`). window_size The length of the window. Can be a dynamic temporal size indicated by a timedelta or the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 calendar day) - 1w (1 calendar week) - 1mo (1 calendar month) - 1q (1 calendar quarter) - 1y (1 calendar year) - 1i (1 index count) By "calendar day", we mean the corresponding time on the next day (which may not be 24 hours, due to daylight savings). Similarly for "calendar week", "calendar month", "calendar quarter", and "calendar year". method : {'average', 'min', 'max', 'dense', 'random'} The method used to assign ranks to tied elements. The following methods are available (default is 'average'): - 'average' : The average of the ranks that would have been assigned to all the tied values is assigned to each value. - 'min' : The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) - 'max' : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. - 'dense' : Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. - 'random' : Choose a random rank for each value in a tie. seed Random seed used when `method='random'`. If set to None (default), a random seed is generated for each rolling rank operation. min_samples The number of values in the window that should be non-null before computing a result. closed : {'left', 'right', 'both', 'none'} Define which sides of the temporal interval are closed (inclusive), defaults to `'right'`. Returns ------- Series A Series of data :class:`.Float64` if `method` is `"average"` or, the index size (see :func:`.get_index_type()`) otherwise. """ @unstable() def rolling_rank( self, window_size: int, method: RankMethod = "average", *, seed: int | None = None, min_samples: int | None = None, center: bool = False, ) -> Series: """ Compute a rolling rank. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. A window of length `window_size` will traverse the array. The values that fill this window will be ranked according to the `method` parameter. The resulting values will be the rank of the value that is at the end of the sliding window. Parameters ---------- window_size Integer size of the rolling window. method : {'average', 'min', 'max', 'dense', 'random'} The method used to assign ranks to tied elements. The following methods are available (default is 'average'): - 'average' : The average of the ranks that would have been assigned to all the tied values is assigned to each value. - 'min' : The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) - 'max' : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. - 'dense' : Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. - 'random' : Choose a random rank for each value in a tie. seed Random seed used when `method='random'`. If set to None (default), a random seed is generated for each rolling rank operation. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. Returns ------- Series A Series of data :class:`.Float64` if `method` is `"average"` or, the index size (see :func:`.get_index_type()`) otherwise. Examples -------- >>> pl.Series([1, 4, 4, 1, 9]).rolling_rank(3, method="average") shape: (5,) Series: '' [f64] [ null null 2.5 1.0 3.0 ] """ @unstable() def rolling_skew( self, window_size: int, *, bias: bool = True, min_samples: int | None = None, center: bool = False, ) -> Series: """ Compute a rolling skew. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. The window at a given row includes the row itself and the `window_size - 1` elements before it. Parameters ---------- window_size Integer size of the rolling window. bias If False, the calculations are corrected for statistical bias. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. See Also -------- Series.skew Examples -------- >>> pl.Series([1, 4, 2, 9]).rolling_skew(3) shape: (4,) Series: '' [f64] [ null null 0.381802 0.47033 ] Note how the values match >>> pl.Series([1, 4, 2]).skew(), pl.Series([4, 2, 9]).skew() (0.38180177416060584, 0.47033046033698594) """ @unstable() def rolling_kurtosis( self, window_size: int, *, fisher: bool = True, bias: bool = True, min_samples: int | None = None, center: bool = False, ) -> Series: """ Compute a rolling kurtosis. .. warning:: This functionality is considered **unstable**. It may be changed at any point without it being considered a breaking change. The window at a given row will include the row itself, and the `window_size - 1` elements before it. Parameters ---------- window_size Integer size of the rolling window. fisher : bool, optional If True, Fisher's definition is used (normal ==> 0.0). If False, Pearson's definition is used (normal ==> 3.0). bias : bool, optional If False, the calculations are corrected for statistical bias. min_samples The number of values in the window that should be non-null before computing a result. If set to `None` (default), it will be set equal to `window_size`. center Set the labels at the center of the window. See Also -------- Series.kurtosis Examples -------- >>> pl.Series([1, 4, 2, 9]).rolling_kurtosis(3) shape: (4,) Series: '' [f64] [ null null -1.5 -1.5 ] """ def sample( self, n: int | None = None, *, fraction: float | None = None, with_replacement: bool = False, shuffle: bool = False, seed: int | None = None, ) -> Series: """ Sample from this Series. Parameters ---------- n Number of items to return. Cannot be used with `fraction`. Defaults to 1 if `fraction` is None. fraction Fraction of items to return. Cannot be used with `n`. with_replacement Allow values to be sampled more than once. shuffle Shuffle the order of sampled data points. seed Seed for the random number generator. If set to None (default), a random seed is generated for each sample operation. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.sample(2, seed=0) # doctest: +IGNORE_RESULT shape: (2,) Series: 'a' [i64] [ 1 5 ] """ def peak_max(self) -> Self: """ Get a boolean mask of the local maximum peaks. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.peak_max() shape: (5,) Series: 'a' [bool] [ false false false false true ] """ def peak_min(self) -> Self: """ Get a boolean mask of the local minimum peaks. Examples -------- >>> s = pl.Series("a", [4, 1, 3, 2, 5]) >>> s.peak_min() shape: (5,) Series: 'a' [bool] [ false true false true false ] """ def n_unique(self) -> int: """ Count the number of unique values in this Series. Examples -------- >>> s = pl.Series("a", [1, 2, 2, 3]) >>> s.n_unique() 3 """ return self._s.n_unique() def shrink_to_fit(self, *, in_place: bool = False) -> Series: """ Shrink Series memory usage. Shrinks the underlying array capacity to exactly fit the actual data. (Note that this function does not change the Series data type). """ if in_place: self._s.shrink_to_fit() return self else: series = self.clone() series._s.shrink_to_fit() return series def hash( self, seed: int = 0, seed_1: int | None = None, seed_2: int | None = None, seed_3: int | None = None, ) -> Series: """ Hash the Series. The hash value is of type `UInt64`. Parameters ---------- seed Random seed parameter. Defaults to 0. seed_1 Random seed parameter. Defaults to `seed` if not set. seed_2 Random seed parameter. Defaults to `seed` if not set. seed_3 Random seed parameter. Defaults to `seed` if not set. Notes ----- This implementation of `hash` does not guarantee stable results across different Polars versions. Its stability is only guaranteed within a single version. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.hash(seed=42) # doctest: +IGNORE_RESULT shape: (3,) Series: 'a' [u64] [ 10734580197236529959 3022416320763508302 13756996518000038261 ] """ def reinterpret(self, *, signed: bool = True) -> Series: """ Reinterpret the underlying bits as a signed/unsigned integer. This operation is only allowed for 64bit integers. For lower bits integers, you can safely use that cast operation. Parameters ---------- signed If True, reinterpret as `pl.Int64`. Otherwise, reinterpret as `pl.UInt64`. Examples -------- >>> s = pl.Series("a", [-(2**60), -2, 3]) >>> s shape: (3,) Series: 'a' [i64] [ -1152921504606846976 -2 3 ] >>> s.reinterpret(signed=False) shape: (3,) Series: 'a' [u64] [ 17293822569102704640 18446744073709551614 3 ] """ def interpolate(self, method: InterpolationMethod = "linear") -> Series: """ Interpolate intermediate values. Nulls at the beginning and end of the series remain null. Parameters ---------- method : {'linear', 'nearest'} Interpolation method. Examples -------- >>> s = pl.Series("a", [1, 2, None, None, 5]) >>> s.interpolate() shape: (5,) Series: 'a' [f64] [ 1.0 2.0 3.0 4.0 5.0 ] """ def interpolate_by(self, by: IntoExpr) -> Series: """ Interpolate intermediate values with x-coordinate based on another column. Nulls at the beginning and end of the series remain null. Parameters ---------- by Column to interpolate values based on. Examples -------- Fill null values using linear interpolation. >>> s = pl.Series([1, None, None, 3]) >>> by = pl.Series([1, 2, 7, 8]) >>> s.interpolate_by(by) shape: (4,) Series: '' [f64] [ 1.0 1.285714 2.714286 3.0 ] """ def abs(self) -> Series: """ Compute absolute values. Same as `abs(series)`. Examples -------- >>> s = pl.Series([1, -2, -3]) >>> s.abs() shape: (3,) Series: '' [i64] [ 1 2 3 ] """ def rank( self, method: RankMethod = "average", *, descending: bool = False, seed: int | None = None, ) -> Series: """ Assign ranks to data, dealing with ties appropriately. Parameters ---------- method : {'average', 'min', 'max', 'dense', 'ordinal', 'random'} The method used to assign ranks to tied elements. The following methods are available (default is 'average'): - 'average' : The average of the ranks that would have been assigned to all the tied values is assigned to each value. - 'min' : The minimum of the ranks that would have been assigned to all the tied values is assigned to each value. (This is also referred to as "competition" ranking.) - 'max' : The maximum of the ranks that would have been assigned to all the tied values is assigned to each value. - 'dense' : Like 'min', but the rank of the next highest element is assigned the rank immediately after those assigned to the tied elements. - 'ordinal' : All values are given a distinct rank, corresponding to the order that the values occur in the Series. - 'random' : Like 'ordinal', but the rank for ties is not dependent on the order that the values occur in the Series. descending Rank in descending order. seed If `method="random"`, use this as seed. Examples -------- The 'average' method: >>> s = pl.Series("a", [3, 6, 1, 1, 6]) >>> s.rank() shape: (5,) Series: 'a' [f64] [ 3.0 4.5 1.5 1.5 4.5 ] The 'ordinal' method: >>> s = pl.Series("a", [3, 6, 1, 1, 6]) >>> s.rank("ordinal") shape: (5,) Series: 'a' [u32] [ 3 4 1 2 5 ] """ def diff(self, n: int = 1, null_behavior: NullBehavior = "ignore") -> Series: """ Calculate the first discrete difference between shifted items. Parameters ---------- n Number of slots to shift. null_behavior : {'ignore', 'drop'} How to handle null values. Examples -------- >>> s = pl.Series("s", values=[20, 10, 30, 25, 35], dtype=pl.Int8) >>> s.diff() shape: (5,) Series: 's' [i8] [ null -10 20 -5 10 ] >>> s.diff(n=2) shape: (5,) Series: 's' [i8] [ null null 10 15 5 ] >>> s.diff(n=2, null_behavior="drop") shape: (3,) Series: 's' [i8] [ 10 15 5 ] """ def pct_change(self, n: int | IntoExprColumn = 1) -> Series: """ Computes percentage change between values. Percentage change (as fraction) between current element and most-recent non-null element at least `n` period(s) before the current element. Computes the change from the previous row by default. Parameters ---------- n periods to shift for forming percent change. Notes ----- Null values are preserved. If you're coming from pandas, this matches their ``fill_method=None`` behaviour. Examples -------- >>> pl.Series(range(10)).pct_change() shape: (10,) Series: '' [f64] [ null inf 1.0 0.5 0.333333 0.25 0.2 0.166667 0.142857 0.125 ] >>> pl.Series([1, 2, 4, 8, 16, 32, 64, 128, 256, 512]).pct_change(2) shape: (10,) Series: '' [f64] [ null null 3.0 3.0 3.0 3.0 3.0 3.0 3.0 3.0 ] """ def skew(self, *, bias: bool = True) -> float | None: r""" Compute the sample skewness of a data set. For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The function `skewtest` can be used to determine if the skewness value is close enough to zero, statistically speaking. See scipy.stats for more information. Parameters ---------- bias : bool, optional If False, the calculations are corrected for statistical bias. Notes ----- The sample skewness is computed as the Fisher-Pearson coefficient of skewness, i.e. .. math:: g_1=\frac{m_3}{m_2^{3/2}} where .. math:: m_i=\frac{1}{N}\sum_{n=1}^N(x[n]-\bar{x})^i is the biased sample :math:`i\texttt{th}` central moment, and :math:`\bar{x}` is the sample mean. If `bias` is False, the calculations are corrected for bias and the value computed is the adjusted Fisher-Pearson standardized moment coefficient, i.e. .. math:: G_1 = \frac{k_3}{k_2^{3/2}} = \frac{\sqrt{N(N-1)}}{N-2}\frac{m_3}{m_2^{3/2}} Examples -------- >>> s = pl.Series([1, 2, 2, 4, 5]) >>> s.skew() 0.34776706224699483 """ return self._s.skew(bias) def kurtosis(self, *, fisher: bool = True, bias: bool = True) -> float | None: """ Compute the kurtosis (Fisher or Pearson) of a dataset. Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definition is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. If bias is False then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators See scipy.stats for more information Parameters ---------- fisher : bool, optional If True, Fisher's definition is used (normal ==> 0.0). If False, Pearson's definition is used (normal ==> 3.0). bias : bool, optional If False, the calculations are corrected for statistical bias. Examples -------- >>> s = pl.Series("grades", [66, 79, 54, 97, 96, 70, 69, 85, 93, 75]) >>> s.kurtosis() -1.0522623626787952 >>> s.kurtosis(fisher=False) 1.9477376373212048 >>> s.kurtosis(fisher=False, bias=False) 2.1040361802642717 """ return self._s.kurtosis(fisher, bias) def clip( self, lower_bound: NumericLiteral | TemporalLiteral | IntoExprColumn | None = None, upper_bound: NumericLiteral | TemporalLiteral | IntoExprColumn | None = None, ) -> Series: """ Set values outside the given boundaries to the boundary value. Parameters ---------- lower_bound Lower bound. Accepts expression input. Non-expression inputs are parsed as literals. If set to `None` (default), no lower bound is applied. upper_bound Upper bound. Accepts expression input. Non-expression inputs are parsed as literals. If set to `None` (default), no upper bound is applied. See Also -------- when Notes ----- This method only works for numeric and temporal columns. To clip other data types, consider writing a `when-then-otherwise` expression. See :func:`when`. Examples -------- Specifying both a lower and upper bound: >>> s = pl.Series([-50, 5, 50, None]) >>> s.clip(1, 10) shape: (4,) Series: '' [i64] [ 1 5 10 null ] Specifying only a single bound: >>> s.clip(upper_bound=10) shape: (4,) Series: '' [i64] [ -50 5 10 null ] """ def lower_bound(self) -> Self: """ Return the lower bound of this Series' dtype as a unit Series. See Also -------- upper_bound : return the upper bound of the given Series' dtype. Examples -------- >>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int32) >>> s.lower_bound() shape: (1,) Series: 's' [i32] [ -2147483648 ] >>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float32) >>> s.lower_bound() shape: (1,) Series: 's' [f32] [ -inf ] """ def upper_bound(self) -> Self: """ Return the upper bound of this Series' dtype as a unit Series. See Also -------- lower_bound : return the lower bound of the given Series' dtype. Examples -------- >>> s = pl.Series("s", [-1, 0, 1], dtype=pl.Int8) >>> s.upper_bound() shape: (1,) Series: 's' [i8] [ 127 ] >>> s = pl.Series("s", [1.0, 2.5, 3.0], dtype=pl.Float64) >>> s.upper_bound() shape: (1,) Series: 's' [f64] [ inf ] """ def replace( self, old: IntoExpr | Sequence[Any] | Mapping[Any, Any], new: IntoExpr | Sequence[Any] | NoDefault = no_default, *, default: IntoExpr | NoDefault = no_default, return_dtype: PolarsDataType | None = None, ) -> Self: """ Replace values by different values of the same data type. Parameters ---------- old Value or sequence of values to replace. Also accepts a mapping of values to their replacement as syntactic sugar for `replace(old=Series(mapping.keys()), new=Series(mapping.values()))`. new Value or sequence of values to replace by. Length must match the length of `old` or have length 1. default Set values that were not replaced to this value. Defaults to keeping the original value. Accepts expression input. Non-expression inputs are parsed as literals. .. deprecated:: 0.20.31 Use :meth:`replace_all` instead to set a default while replacing values. return_dtype The data type of the resulting expression. If set to `None` (default), the data type is determined automatically based on the other inputs. .. deprecated:: 0.20.31 Use :meth:`replace_all` instead to set a return data type while replacing values. See Also -------- replace_strict str.replace Notes ----- The global string cache must be enabled when replacing categorical values. Examples -------- Replace a single value by another value. Values that were not replaced remain unchanged. >>> s = pl.Series([1, 2, 2, 3]) >>> s.replace(2, 100) shape: (4,) Series: '' [i64] [ 1 100 100 3 ] Replace multiple values by passing sequences to the `old` and `new` parameters. >>> s.replace([2, 3], [100, 200]) shape: (4,) Series: '' [i64] [ 1 100 100 200 ] Passing a mapping with replacements is also supported as syntactic sugar. >>> mapping = {2: 100, 3: 200} >>> s.replace(mapping) shape: (4,) Series: '' [i64] [ 1 100 100 200 ] The original data type is preserved when replacing by values of a different data type. Use :meth:`replace_strict` to replace and change the return data type. >>> s = pl.Series(["x", "y", "z"]) >>> mapping = {"x": 1, "y": 2, "z": 3} >>> s.replace(mapping) shape: (3,) Series: '' [str] [ "1" "2" "3" ] """ def replace_strict( self, old: IntoExpr | Sequence[Any] | Mapping[Any, Any], new: IntoExpr | Sequence[Any] | NoDefault = no_default, *, default: IntoExpr | NoDefault = no_default, return_dtype: PolarsDataType | None = None, ) -> Self: """ Replace all values by different values. Parameters ---------- old Value or sequence of values to replace. Also accepts a mapping of values to their replacement as syntactic sugar for `replace_strict(old=Series(mapping.keys()), new=Series(mapping.values()))`. new Value or sequence of values to replace by. Length must match the length of `old` or have length 1. default Set values that were not replaced to this value. If no default is specified, (default), an error is raised if any values were not replaced. Accepts expression input. Non-expression inputs are parsed as literals. return_dtype The data type of the resulting Series. If set to `None` (default), the data type is determined automatically based on the other inputs. Raises ------ InvalidOperationError If any non-null values in the original column were not replaced, and no `default` was specified. See Also -------- replace str.replace Notes ----- The global string cache must be enabled when replacing categorical values. Examples -------- Replace values by passing sequences to the `old` and `new` parameters. >>> s = pl.Series([1, 2, 2, 3]) >>> s.replace_strict([1, 2, 3], [100, 200, 300]) shape: (4,) Series: '' [i64] [ 100 200 200 300 ] Passing a mapping with replacements is also supported as syntactic sugar. >>> mapping = {1: 100, 2: 200, 3: 300} >>> s.replace_strict(mapping) shape: (4,) Series: '' [i64] [ 100 200 200 300 ] By default, an error is raised if any non-null values were not replaced. Specify a default to set all values that were not matched. >>> mapping = {2: 200, 3: 300} >>> s.replace_strict(mapping) # doctest: +SKIP Traceback (most recent call last): ... polars.exceptions.InvalidOperationError: incomplete mapping specified for `replace_strict` >>> s.replace_strict(mapping, default=-1) shape: (4,) Series: '' [i64] [ -1 200 200 300 ] The default can be another Series. >>> default = pl.Series([2.5, 5.0, 7.5, 10.0]) >>> s.replace_strict(2, 200, default=default) shape: (4,) Series: '' [f64] [ 2.5 200.0 200.0 10.0 ] Replacing by values of a different data type sets the return type based on a combination of the `new` data type and the `default` data type. >>> s = pl.Series(["x", "y", "z"]) >>> mapping = {"x": 1, "y": 2, "z": 3} >>> s.replace_strict(mapping) shape: (3,) Series: '' [i64] [ 1 2 3 ] >>> s.replace_strict(mapping, default="x") shape: (3,) Series: '' [str] [ "1" "2" "3" ] Set the `return_dtype` parameter to control the resulting data type directly. >>> s.replace_strict(mapping, return_dtype=pl.UInt8) shape: (3,) Series: '' [u8] [ 1 2 3 ] """ # noqa: W505 def reshape(self, dimensions: tuple[int, ...]) -> Series: """ Reshape this Series to a flat Series or an Array Series. Parameters ---------- dimensions Tuple of the dimension sizes. If a -1 is used in any of the dimensions, that dimension is inferred. Returns ------- Series If a single dimension is given, results in a Series of the original data type. If a multiple dimensions are given, results in a Series of data type :class:`Array` with shape `dimensions`. See Also -------- Series.list.explode : Explode a list column. Examples -------- >>> s = pl.Series("foo", [1, 2, 3, 4, 5, 6, 7, 8, 9]) >>> square = s.reshape((3, 3)) >>> square shape: (3,) Series: 'foo' [array[i64, 3]] [ [1, 2, 3] [4, 5, 6] [7, 8, 9] ] >>> square.reshape((9,)) shape: (9,) Series: 'foo' [i64] [ 1 2 3 4 5 6 7 8 9 ] """ return self._from_pyseries(self._s.reshape(dimensions)) def shuffle(self, seed: int | None = None) -> Series: """ Shuffle the contents of this Series. Parameters ---------- seed Seed for the random number generator. If set to None (default), a random seed is generated each time the shuffle is called. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.shuffle(seed=1) shape: (3,) Series: 'a' [i64] [ 2 3 1 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def ewm_mean( self, *, com: float | None = None, span: float | None = None, half_life: float | None = None, alpha: float | None = None, adjust: bool = True, min_samples: int = 1, ignore_nulls: bool = False, ) -> Series: r""" Compute exponentially-weighted moving average. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- com Specify decay in terms of center of mass, :math:`\gamma`, with .. math:: \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 span Specify decay in terms of span, :math:`\theta`, with .. math:: \alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 half_life Specify decay in terms of half-life, :math:`\tau`, with .. math:: \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \tau } \right\} \; \forall \; \tau > 0 alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`. adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings - When `adjust=True` (the default) the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i` - When `adjust=False` the EW function is calculated recursively by .. math:: y_0 &= x_0 \\ y_t &= (1 - \alpha)y_{t - 1} + \alpha x_t min_samples Minimum number of observations in window required to have a value (otherwise result is null). ignore_nulls Ignore missing values when calculating weights. - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if `adjust=True`, and :math:`1-\alpha` and :math:`\alpha` if `adjust=False`. Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.ewm_mean(com=1, ignore_nulls=False) shape: (3,) Series: '' [f64] [ 1.0 1.666667 2.428571 ] """ def ewm_mean_by( self, by: IntoExpr, *, half_life: str | timedelta, ) -> Series: r""" Compute time-based exponentially weighted moving average. Given observations :math:`x_0, x_1, \ldots, x_{n-1}` at times :math:`t_0, t_1, \ldots, t_{n-1}`, the EWMA is calculated as .. math:: y_0 &= x_0 \alpha_i &= 1 - \exp \left\{ \frac{ -\ln(2)(t_i-t_{i-1}) } { \tau } \right\} y_i &= \alpha_i x_i + (1 - \alpha_i) y_{i-1}; \quad i > 0 where :math:`\tau` is the `half_life`. Parameters ---------- by Times to calculate average by. Should be ``DateTime``, ``Date``, ``UInt64``, ``UInt32``, ``Int64``, or ``Int32`` data type. half_life Unit over which observation decays to half its value. Can be created either from a timedelta, or by using the following string language: - 1ns (1 nanosecond) - 1us (1 microsecond) - 1ms (1 millisecond) - 1s (1 second) - 1m (1 minute) - 1h (1 hour) - 1d (1 day) - 1w (1 week) - 1i (1 index count) Or combine them: "3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds Note that `half_life` is treated as a constant duration - calendar durations such as months (or even days in the time-zone-aware case) are not supported, please express your duration in an approximately equivalent number of hours (e.g. '370h' instead of '1mo'). Returns ------- Expr Float32 if input is Float32, otherwise Float64. Examples -------- >>> from datetime import date, timedelta >>> df = pl.DataFrame( ... { ... "values": [0, 1, 2, None, 4], ... "times": [ ... date(2020, 1, 1), ... date(2020, 1, 3), ... date(2020, 1, 10), ... date(2020, 1, 15), ... date(2020, 1, 17), ... ], ... } ... ).sort("times") >>> df["values"].ewm_mean_by(df["times"], half_life="4d") shape: (5,) Series: 'values' [f64] [ 0.0 0.292893 1.492474 null 3.254508 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def ewm_std( self, *, com: float | None = None, span: float | None = None, half_life: float | None = None, alpha: float | None = None, adjust: bool = True, bias: bool = False, min_samples: int = 1, ignore_nulls: bool = False, ) -> Series: r""" Compute exponentially-weighted moving standard deviation. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- com Specify decay in terms of center of mass, :math:`\gamma`, with .. math:: \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 span Specify decay in terms of span, :math:`\theta`, with .. math:: \alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 half_life Specify decay in terms of half-life, :math:`\lambda`, with .. math:: \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} \; \forall \; \lambda > 0 alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`. adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings - When `adjust=True` (the default) the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i` - When `adjust=False` the EW function is calculated recursively by .. math:: y_0 &= x_0 \\ y_t &= (1 - \alpha)y_{t - 1} + \alpha x_t bias When `bias=False`, apply a correction to make the estimate statistically unbiased. min_samples Minimum number of observations in window required to have a value (otherwise result is null). ignore_nulls Ignore missing values when calculating weights. - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if `adjust=True`, and :math:`1-\alpha` and :math:`\alpha` if `adjust=False`. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.ewm_std(com=1, ignore_nulls=False) shape: (3,) Series: 'a' [f64] [ 0.0 0.707107 0.963624 ] """ @deprecate_renamed_parameter("min_periods", "min_samples", version="1.21.0") def ewm_var( self, *, com: float | None = None, span: float | None = None, half_life: float | None = None, alpha: float | None = None, adjust: bool = True, bias: bool = False, min_samples: int = 1, ignore_nulls: bool = False, ) -> Series: r""" Compute exponentially-weighted moving variance. .. versionchanged:: 1.21.0 The `min_periods` parameter was renamed `min_samples`. Parameters ---------- com Specify decay in terms of center of mass, :math:`\gamma`, with .. math:: \alpha = \frac{1}{1 + \gamma} \; \forall \; \gamma \geq 0 span Specify decay in terms of span, :math:`\theta`, with .. math:: \alpha = \frac{2}{\theta + 1} \; \forall \; \theta \geq 1 half_life Specify decay in terms of half-life, :math:`\lambda`, with .. math:: \alpha = 1 - \exp \left\{ \frac{ -\ln(2) }{ \lambda } \right\} \; \forall \; \lambda > 0 alpha Specify smoothing factor alpha directly, :math:`0 < \alpha \leq 1`. adjust Divide by decaying adjustment factor in beginning periods to account for imbalance in relative weightings - When `adjust=True` (the default) the EW function is calculated using weights :math:`w_i = (1 - \alpha)^i` - When `adjust=False` the EW function is calculated recursively by .. math:: y_0 &= x_0 \\ y_t &= (1 - \alpha)y_{t - 1} + \alpha x_t bias When `bias=False`, apply a correction to make the estimate statistically unbiased. min_samples Minimum number of observations in window required to have a value (otherwise result is null). ignore_nulls Ignore missing values when calculating weights. - When `ignore_nulls=False` (default), weights are based on absolute positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`(1-\alpha)^2` and :math:`1` if `adjust=True`, and :math:`(1-\alpha)^2` and :math:`\alpha` if `adjust=False`. - When `ignore_nulls=True`, weights are based on relative positions. For example, the weights of :math:`x_0` and :math:`x_2` used in calculating the final weighted average of [:math:`x_0`, None, :math:`x_2`] are :math:`1-\alpha` and :math:`1` if `adjust=True`, and :math:`1-\alpha` and :math:`\alpha` if `adjust=False`. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.ewm_var(com=1, ignore_nulls=False) shape: (3,) Series: 'a' [f64] [ 0.0 0.5 0.928571 ] """ def extend_constant(self, value: IntoExpr, n: int | IntoExprColumn) -> Series: """ Extremely fast method for extending the Series with 'n' copies of a value. Parameters ---------- value A constant literal value or a unit expression with which to extend the expression result Series; can pass None to extend with nulls. n The number of additional values that will be added. Examples -------- >>> s = pl.Series([1, 2, 3]) >>> s.extend_constant(99, n=2) shape: (5,) Series: '' [i64] [ 1 2 3 99 99 ] """ def set_sorted(self, *, descending: bool = False) -> Self: """ Flags the Series as 'sorted'. Enables downstream code to user fast paths for sorted arrays. Parameters ---------- descending If the `Series` order is descending. Warnings -------- This can lead to incorrect results if this `Series` is not sorted!! Use with care! Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.set_sorted().max() 3 """ return self._from_pyseries(self._s.set_sorted_flag(descending)) def new_from_index(self, index: int, length: int) -> Self: """ Create a new Series filled with values from the given index. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5]) >>> s.new_from_index(1, 3) shape: (3,) Series: 'a' [i64] [ 2 2 2 ] """ return self._from_pyseries(self._s.new_from_index(index, length)) def shrink_dtype(self) -> Series: """ Shrink numeric columns to the minimal required datatype. Shrink to the dtype needed to fit the extrema of this [`Series`]. This can be used to reduce memory pressure. Examples -------- >>> s = pl.Series("a", [1, 2, 3, 4, 5, 6]) >>> s shape: (6,) Series: 'a' [i64] [ 1 2 3 4 5 6 ] >>> s.shrink_dtype() shape: (6,) Series: 'a' [i8] [ 1 2 3 4 5 6 ] """ return wrap_s(self._s.shrink_dtype()) def get_chunks(self) -> list[Series]: """ Get the chunks of this Series as a list of Series. Examples -------- >>> s1 = pl.Series("a", [1, 2, 3]) >>> s2 = pl.Series("a", [4, 5, 6]) >>> s = pl.concat([s1, s2], rechunk=False) >>> s.get_chunks() [shape: (3,) Series: 'a' [i64] [ 1 2 3 ], shape: (3,) Series: 'a' [i64] [ 4 5 6 ]] """ return self._s.get_chunks() def implode(self) -> Self: """ Aggregate values into a list. The returned list itself is a scalar value of `list` dtype. Examples -------- >>> s = pl.Series("a", [1, 2, 3]) >>> s.implode() shape: (1,) Series: 'a' [list[i64]] [ [1, 2, 3] ] """ def bitwise_count_ones(self) -> Self: """Evaluate the number of set bits.""" def bitwise_count_zeros(self) -> Self: """Evaluate the number of unset Self.""" def bitwise_leading_ones(self) -> Self: """Evaluate the number most-significant set bits before seeing an unset bit.""" def bitwise_leading_zeros(self) -> Self: """Evaluate the number most-significant unset bits before seeing a set bit.""" def bitwise_trailing_ones(self) -> Self: """Evaluate the number least-significant set bits before seeing an unset bit.""" def bitwise_trailing_zeros(self) -> Self: """Evaluate the number least-significant unset bits before seeing a set bit.""" def bitwise_and(self) -> PythonLiteral | None: """Perform an aggregation of bitwise ANDs.""" return self._s.bitwise_and() def bitwise_or(self) -> PythonLiteral | None: """Perform an aggregation of bitwise ORs.""" return self._s.bitwise_or() def bitwise_xor(self) -> PythonLiteral | None: """Perform an aggregation of bitwise XORs.""" return self._s.bitwise_xor() def first(self) -> PythonLiteral | None: """ Get the first element of the Series. Returns `None` if the Series is empty. """ return self._s.first() def last(self) -> PythonLiteral | None: """ Get the last element of the Series. Returns `None` if the Series is empty. """ return self._s.last() def approx_n_unique(self) -> PythonLiteral | None: """ Approximate count of unique values. This is done using the HyperLogLog++ algorithm for cardinality estimation. """ return self._s.approx_n_unique() def _row_encode( self, *, unordered: bool = False, descending: bool | None = None, nulls_last: bool | None = None, ) -> Series: """Encode to the row encoding.""" return ( self.to_frame() .select_seq( F.col(self.name)._row_encode( unordered=unordered, descending=descending, nulls_last=nulls_last ) ) .to_series() ) def _row_decode( self, names: Sequence[str], dtypes: Sequence[PolarsDataType], *, unordered: bool = False, descending: Sequence[bool] | None = None, nulls_last: Sequence[bool] | None = None, ) -> Series: """Decode from the row encoding.""" return ( self.to_frame() .select_seq( F.col(self.name)._row_decode( names, dtypes, unordered=unordered, descending=descending, nulls_last=nulls_last, ) ) .to_series() ) def repeat_by(self, by: int | IntoExprColumn) -> Self: """ Repeat the elements in this Series as specified in the given expression. The repeated elements are expanded into a List. Parameters ---------- by Numeric column that determines how often the values will be repeated. The column will be coerced to UInt32. Give this dtype to make the coercion a no-op. Returns ------- Expr Expression of data type List, where the inner data type is equal to the original data type. """ # Keep the `list` and `str` properties below at the end of the definition of Series, # as to not confuse mypy with the type annotation `str` and `list` @property def bin(self) -> BinaryNameSpace: """Create an object namespace of all binary related methods.""" return BinaryNameSpace(self) @property def cat(self) -> CatNameSpace: """Create an object namespace of all categorical related methods.""" return CatNameSpace(self) @property def dt(self) -> DateTimeNameSpace: """Create an object namespace of all datetime related methods.""" return DateTimeNameSpace(self) @property def list(self) -> ListNameSpace: """Create an object namespace of all list related methods.""" return ListNameSpace(self) @property def arr(self) -> ArrayNameSpace: """Create an object namespace of all array related methods.""" return ArrayNameSpace(self) @property def str(self) -> StringNameSpace: """Create an object namespace of all string related methods.""" return StringNameSpace(self) @property def struct(self) -> StructNameSpace: """Create an object namespace of all struct related methods.""" return StructNameSpace(self) @property @unstable() def plot(self) -> SeriesPlot: """ Create a plot namespace. .. warning:: This functionality is currently considered **unstable**. It may be changed at any point without it being considered a breaking change. .. versionchanged:: 1.6.0 In prior versions of Polars, HvPlot was the plotting backend. If you would like to restore the previous plotting functionality, all you need to do is add `import hvplot.polars` at the top of your script and replace `df.plot` with `df.hvplot`. Polars does not implement plotting logic itself, but instead defers to Altair: - `s.plot.hist(**kwargs)` is shorthand for `alt.Chart(s.to_frame()).mark_bar(tooltip=True).encode(x=alt.X(f'{s.name}:Q', bin=True), y='count()', **kwargs).interactive()` - `s.plot.kde(**kwargs)` is shorthand for `alt.Chart(s.to_frame()).transform_density(s.name, as_=[s.name, 'density']).mark_area(tooltip=True).encode(x=s.name, y='density:Q', **kwargs).interactive()` - for any other attribute `attr`, `s.plot.attr(**kwargs)` is shorthand for `alt.Chart(s.to_frame().with_row_index()).mark_attr(tooltip=True).encode(x='index', y=s.name, **kwargs).interactive()` For configuration, we suggest reading `Chart Configuration `_. For example, you can: - Change the width/height/title with ``.properties(width=500, height=350, title="My amazing plot")``. - Change the x-axis label rotation with ``.configure_axisX(labelAngle=30)``. - Change the opacity of the points in your scatter plot with ``.configure_point(opacity=.5)``. Examples -------- Histogram: >>> s = pl.Series([1, 4, 4, 6, 2, 4, 3, 5, 5, 7, 1]) >>> s.plot.hist() # doctest: +SKIP KDE plot: >>> s.plot.kde() # doctest: +SKIP Line plot: >>> s.plot.line() # doctest: +SKIP """ # noqa: W505 if not _ALTAIR_AVAILABLE or parse_version(altair.__version__) < (5, 4, 0): msg = "altair>=5.4.0 is required for `.plot`" raise ModuleUpgradeRequiredError(msg) return SeriesPlot(self) def _resolve_temporal_dtype( dtype: PolarsDataType | None, ndtype: np.dtype[np.datetime64] | np.dtype[np.timedelta64], ) -> PolarsDataType | None: """Given polars/numpy temporal dtypes, resolve to an explicit unit.""" PolarsType = Duration if ndtype.type == np.timedelta64 else Datetime if dtype is None or (dtype == Datetime and not getattr(dtype, "time_unit", None)): time_unit = getattr(dtype, "time_unit", None) or np.datetime_data(ndtype)[0] # explicit formulation is verbose, but keeps mypy happy # (and avoids unsupported timeunits such as "s") if time_unit == "ns": dtype = PolarsType("ns") elif time_unit == "us": dtype = PolarsType("us") elif time_unit == "ms": dtype = PolarsType("ms") elif time_unit == "D" and ndtype.type == np.datetime64: dtype = Date return dtype