DriverTrac/venv/lib/python3.12/site-packages/polars/functions/range/datetime_range.py

371 lines
12 KiB
Python

from __future__ import annotations
import contextlib
from typing import TYPE_CHECKING, overload
from polars import functions as F
from polars._utils.parse import parse_into_expression
from polars._utils.wrap import wrap_expr
from polars.functions.range._utils import parse_interval_argument
with contextlib.suppress(ImportError): # Module not available when building docs
import polars._plr as plr
if TYPE_CHECKING:
from datetime import date, datetime, timedelta
from typing import Literal
from polars import Expr, Series
from polars._typing import ClosedInterval, IntoExprColumn, TimeUnit
@overload
def datetime_range(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = ...,
*,
closed: ClosedInterval = ...,
time_unit: TimeUnit | None = ...,
time_zone: str | None = ...,
eager: Literal[False] = ...,
) -> Expr: ...
@overload
def datetime_range(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = ...,
*,
closed: ClosedInterval = ...,
time_unit: TimeUnit | None = ...,
time_zone: str | None = ...,
eager: Literal[True],
) -> Series: ...
@overload
def datetime_range(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = ...,
*,
closed: ClosedInterval = ...,
time_unit: TimeUnit | None = ...,
time_zone: str | None = ...,
eager: bool,
) -> Series | Expr: ...
def datetime_range(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = "1d",
*,
closed: ClosedInterval = "both",
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
eager: bool = False,
) -> Series | Expr:
"""
Generate a datetime range.
Parameters
----------
start
Lower bound of the datetime range.
end
Upper bound of the datetime range.
interval
Interval of the range periods, specified as a Python `timedelta` object
or using the Polars duration string language (see "Notes" section below).
closed : {'both', 'left', 'right', 'none'}
Define which sides of the range are closed (inclusive).
time_unit : {None, 'ns', 'us', 'ms'}
Time unit of the resulting `Datetime` data type.
time_zone
Time zone of the resulting `Datetime` data type.
eager
Evaluate immediately and return a `Series`.
If set to `False` (default), return an expression instead.
Returns
-------
Expr or Series
Column of data type :class:`Datetime`.
See Also
--------
datetime_ranges
date_range
Notes
-----
`interval` is created according to the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 calendar day)
- 1w (1 calendar week)
- 1mo (1 calendar month)
- 1q (1 calendar quarter)
- 1y (1 calendar year)
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".
Examples
--------
Using Polars duration string to specify the interval:
>>> from datetime import datetime
>>> pl.datetime_range(
... datetime(2022, 1, 1), datetime(2022, 3, 1), "1mo", eager=True
... ).alias("datetime")
shape: (3,)
Series: 'datetime' [datetime[μs]]
[
2022-01-01 00:00:00
2022-02-01 00:00:00
2022-03-01 00:00:00
]
Using `timedelta` object to specify the interval:
>>> from datetime import date, timedelta
>>> pl.datetime_range(
... date(1985, 1, 1),
... date(1985, 1, 10),
... timedelta(days=1, hours=12),
... time_unit="ms",
... eager=True,
... ).alias("datetime")
shape: (7,)
Series: 'datetime' [datetime[ms]]
[
1985-01-01 00:00:00
1985-01-02 12:00:00
1985-01-04 00:00:00
1985-01-05 12:00:00
1985-01-07 00:00:00
1985-01-08 12:00:00
1985-01-10 00:00:00
]
Specifying a time zone:
>>> pl.datetime_range(
... datetime(2022, 1, 1),
... datetime(2022, 3, 1),
... "1mo",
... time_zone="America/New_York",
... eager=True,
... ).alias("datetime")
shape: (3,)
Series: 'datetime' [datetime[μs, America/New_York]]
[
2022-01-01 00:00:00 EST
2022-02-01 00:00:00 EST
2022-03-01 00:00:00 EST
]
Omit `eager=True` if you want to use `datetime_range` as an expression:
>>> df = pl.DataFrame(
... {
... "date": [
... date(2024, 1, 1),
... date(2024, 1, 2),
... date(2024, 1, 1),
... date(2024, 1, 3),
... ],
... "key": ["one", "one", "two", "two"],
... }
... )
>>> result = (
... df.group_by("key")
... .agg(pl.datetime_range(pl.col("date").min(), pl.col("date").max()))
... .sort("key")
... )
>>> with pl.Config(fmt_str_lengths=70):
... print(result)
shape: (2, 2)
┌─────┬─────────────────────────────────────────────────────────────────┐
│ key ┆ date │
│ --- ┆ --- │
│ str ┆ list[datetime[μs]] │
╞═════╪═════════════════════════════════════════════════════════════════╡
│ one ┆ [2024-01-01 00:00:00, 2024-01-02 00:00:00] │
│ two ┆ [2024-01-01 00:00:00, 2024-01-02 00:00:00, 2024-01-03 00:00:00] │
└─────┴─────────────────────────────────────────────────────────────────┘
"""
interval = parse_interval_argument(interval)
if time_unit is None and "ns" in interval:
time_unit = "ns"
start_pyexpr = parse_into_expression(start)
end_pyexpr = parse_into_expression(end)
result = wrap_expr(
plr.datetime_range(
start_pyexpr, end_pyexpr, interval, closed, time_unit, time_zone
)
)
if eager:
return F.select(result).to_series()
return result
@overload
def datetime_ranges(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = ...,
*,
closed: ClosedInterval = ...,
time_unit: TimeUnit | None = ...,
time_zone: str | None = ...,
eager: Literal[False] = ...,
) -> Expr: ...
@overload
def datetime_ranges(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = ...,
*,
closed: ClosedInterval = ...,
time_unit: TimeUnit | None = ...,
time_zone: str | None = ...,
eager: Literal[True],
) -> Series: ...
@overload
def datetime_ranges(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = ...,
*,
closed: ClosedInterval = ...,
time_unit: TimeUnit | None = ...,
time_zone: str | None = ...,
eager: bool,
) -> Series | Expr: ...
def datetime_ranges(
start: datetime | date | IntoExprColumn,
end: datetime | date | IntoExprColumn,
interval: str | timedelta = "1d",
*,
closed: ClosedInterval = "both",
time_unit: TimeUnit | None = None,
time_zone: str | None = None,
eager: bool = False,
) -> Series | Expr:
"""
Create a column of datetime ranges.
Parameters
----------
start
Lower bound of the datetime range.
end
Upper bound of the datetime range.
interval
Interval of the range periods, specified as a Python `timedelta` object
or using the Polars duration string language (see "Notes" section below).
closed : {'both', 'left', 'right', 'none'}
Define which sides of the range are closed (inclusive).
time_unit : {None, 'ns', 'us', 'ms'}
Time unit of the resulting `Datetime` data type.
time_zone
Time zone of the resulting `Datetime` data type.
eager
Evaluate immediately and return a `Series`.
If set to `False` (default), return an expression instead.
Notes
-----
`interval` is created according to the following string language:
- 1ns (1 nanosecond)
- 1us (1 microsecond)
- 1ms (1 millisecond)
- 1s (1 second)
- 1m (1 minute)
- 1h (1 hour)
- 1d (1 calendar day)
- 1w (1 calendar week)
- 1mo (1 calendar month)
- 1q (1 calendar quarter)
- 1y (1 calendar year)
Or combine them:
"3d12h4m25s" # 3 days, 12 hours, 4 minutes, and 25 seconds
By "calendar day", we mean the corresponding time on the next day (which may
not be 24 hours, due to daylight savings). Similarly for "calendar week",
"calendar month", "calendar quarter", and "calendar year".
Returns
-------
Expr or Series
Column of data type `List(Datetime)`.
See Also
--------
datetime_range
date_ranges
Examples
--------
>>> from datetime import datetime
>>> df = pl.DataFrame(
... {
... "start": [datetime(2022, 1, 1), datetime(2022, 1, 2)],
... "end": datetime(2022, 1, 3),
... }
... )
>>> with pl.Config(fmt_str_lengths=100):
... df.select(datetime_range=pl.datetime_ranges("start", "end"))
shape: (2, 1)
┌─────────────────────────────────────────────────────────────────┐
│ datetime_range │
│ --- │
│ list[datetime[μs]] │
╞═════════════════════════════════════════════════════════════════╡
│ [2022-01-01 00:00:00, 2022-01-02 00:00:00, 2022-01-03 00:00:00] │
│ [2022-01-02 00:00:00, 2022-01-03 00:00:00] │
└─────────────────────────────────────────────────────────────────┘
"""
interval = parse_interval_argument(interval)
if time_unit is None and "ns" in interval:
time_unit = "ns"
start_pyexpr = parse_into_expression(start)
end_pyexpr = parse_into_expression(end)
result = wrap_expr(
plr.datetime_ranges(
start_pyexpr, end_pyexpr, interval, closed, time_unit, time_zone
)
)
if eager:
return F.select(result).to_series()
return result