DriverTrac/venv/lib/python3.12/site-packages/polars/testing/parametric/strategies/legacy.py

170 lines
5.7 KiB
Python

from __future__ import annotations
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any
import hypothesis.strategies as st
from hypothesis.errors import InvalidArgument
from polars._utils.deprecation import deprecated
from polars.datatypes import is_polars_dtype
from polars.testing.parametric.strategies.core import _COL_LIMIT, column
from polars.testing.parametric.strategies.data import lists
from polars.testing.parametric.strategies.dtype import _instantiate_dtype, dtypes
if TYPE_CHECKING:
import sys
from hypothesis.strategies import SearchStrategy
from polars._typing import OneOrMoreDataTypes, PolarsDataType
if sys.version_info >= (3, 13):
from warnings import deprecated
else:
from typing_extensions import deprecated # noqa: TC004
@deprecated(
"`columns` is deprecated; use `column` instead, "
"in conjunction with a list comprehension."
)
def columns(
cols: int | Sequence[str] | None = None,
*,
dtype: OneOrMoreDataTypes | None = None,
min_cols: int = 0,
max_cols: int = _COL_LIMIT,
unique: bool = False,
) -> list[column]:
"""
Define multiple columns for use with the @dataframes strategy.
.. deprecated:: 0.20.26
Use :class:`column` instead, in conjunction with a list comprehension.
.. warning::
This functionality is currently considered **unstable**. It may be
changed at any point without it being considered a breaking change.
Generate a fixed sequence of `column` objects suitable for passing to the
@dataframes strategy, or using standalone (note that this function is not itself
a strategy).
Notes
-----
Additional control is available by creating a sequence of columns explicitly,
using the `column` class (an especially useful option is to override the default
data-generating strategy for a given col/dtype).
Parameters
----------
cols : {int, [str]}, optional
integer number of cols to create, or explicit list of column names. if
omitted a random number of columns (between mincol and max_cols) are
created.
dtype : PolarsDataType, optional
a single dtype for all cols, or list of dtypes (the same length as `cols`).
if omitted, each generated column is assigned a random dtype.
min_cols : int, optional
if not passing an exact size, can set a minimum here (defaults to 0).
max_cols : int, optional
if not passing an exact size, can set a maximum value here (defaults to
MAX_COLS).
unique : bool, optional
indicate if the values generated for these columns should be unique
(per-column).
Examples
--------
>>> from polars.testing.parametric import columns, dataframes
>>> from hypothesis import given
>>> @given(dataframes(columns(["x", "y", "z"], unique=True))) # doctest: +SKIP
... def test_unique_xyz(df: pl.DataFrame) -> None:
... assert_something(df)
"""
# create/assign named columns
if cols is None:
cols = st.integers(min_value=min_cols, max_value=max_cols).example()
if isinstance(cols, int):
names: Sequence[str] = [f"col{n}" for n in range(cols)]
else:
names = cols
n_cols = len(names)
if dtype is None:
dtypes: Sequence[PolarsDataType | None] = [None] * n_cols
elif is_polars_dtype(dtype):
dtypes = [dtype] * n_cols
elif isinstance(dtype, Sequence):
if (n_dtypes := len(dtype)) != n_cols:
msg = f"given {n_dtypes} dtypes for {n_cols} names"
raise InvalidArgument(msg)
dtypes = dtype
else:
msg = f"{dtype!r} is not a valid polars datatype"
raise InvalidArgument(msg)
# init list of named/typed columns
return [column(name=nm, dtype=tp, unique=unique) for nm, tp in zip(names, dtypes)]
@deprecated("`create_list_strategy` is deprecated; use `lists` instead.")
def create_list_strategy(
inner_dtype: PolarsDataType | None = None,
*,
select_from: Sequence[Any] | None = None,
size: int | None = None,
min_size: int = 0,
max_size: int | None = None,
unique: bool = False,
) -> SearchStrategy[list[Any]]:
"""
Create a strategy for generating Polars :class:`List` data.
.. deprecated:: 0.20.26
Use :func:`lists` instead.
Parameters
----------
inner_dtype : PolarsDataType
type of the inner list elements (can also be another List).
select_from : list, optional
randomly select the innermost values from this list (otherwise
the default strategy associated with the innermost dtype is used).
size : int, optional
if set, generated lists will be of exactly this size (and
ignore the min_size/max_size params).
min_size : int, optional
set the minimum size of the generated lists (default: 0 if unset).
max_size : int, optional
set the maximum size of the generated lists (default: 3 if
min_size is unset or zero, otherwise 2x min_size).
unique : bool, optional
ensure that the generated lists contain unique values.
Examples
--------
Create a strategy that generates a list of i32 values:
>>> from polars.testing.parametric import create_list_strategy
>>> lst = create_list_strategy(inner_dtype=pl.Int32) # doctest: +SKIP
>>> lst.example() # doctest: +SKIP
[-11330, 24030, 116]
"""
if size is not None:
min_size = max_size = size
if inner_dtype is None:
inner_dtype = dtypes().example()
else:
inner_dtype = _instantiate_dtype(inner_dtype).example()
return lists(
inner_dtype,
select_from=select_from,
min_size=min_size,
max_size=max_size,
unique=unique,
)