DriverTrac/venv/lib/python3.12/site-packages/polars/_typing.py
2025-11-28 09:08:33 +05:30

484 lines
14 KiB
Python

from __future__ import annotations
from collections.abc import Collection, Iterable, Mapping, Sequence
from pathlib import Path
from typing import (
IO,
TYPE_CHECKING,
Any,
Callable,
Literal,
Protocol,
TypedDict,
TypeVar,
Union,
)
if TYPE_CHECKING:
import contextlib
import sys
from datetime import date, datetime, time, timedelta
from decimal import Decimal
from sqlalchemy.engine import Connection, Engine
from sqlalchemy.orm import Session
from polars import DataFrame, Expr, LazyFrame, Series
from polars._dependencies import numpy as np
from polars._dependencies import pandas as pd
from polars._dependencies import pyarrow as pa
from polars._dependencies import torch
from polars.datatypes import DataType, DataTypeClass, IntegerType, TemporalType
from polars.lazyframe.engine_config import GPUEngine
from polars.selectors import Selector
with contextlib.suppress(ImportError): # Module not available when building docs
from polars._plr import PyPartitioning
if sys.version_info >= (3, 10):
from typing import TypeAlias
else:
from typing_extensions import TypeAlias
class ArrowArrayExportable(Protocol):
"""Type protocol for Arrow C Data Interface via Arrow PyCapsule Interface."""
def __arrow_c_array__(
self, requested_schema: object | None = None
) -> tuple[object, object]: ...
class ArrowStreamExportable(Protocol):
"""Type protocol for Arrow C Stream Interface via Arrow PyCapsule Interface."""
def __arrow_c_stream__(self, requested_schema: object | None = None) -> object: ...
class ArrowSchemaExportable(Protocol):
"""Type protocol for Arrow C Schema Interface via Arrow PyCapsule Interface."""
def __arrow_c_schema__(self) -> object: ...
# Data types
PolarsDataType: TypeAlias = Union["DataTypeClass", "DataType"]
PolarsTemporalType: TypeAlias = Union[type["TemporalType"], "TemporalType"]
PolarsIntegerType: TypeAlias = Union[type["IntegerType"], "IntegerType"]
OneOrMoreDataTypes: TypeAlias = Union[PolarsDataType, Iterable[PolarsDataType]]
PythonDataType: TypeAlias = Union[
type[int],
type[float],
type[bool],
type[str],
type["date"],
type["time"],
type["datetime"],
type["timedelta"],
type[list[Any]],
type[tuple[Any, ...]],
type[bytes],
type[object],
type["Decimal"],
type[None],
]
SchemaDefinition: TypeAlias = Union[
Mapping[str, Union[PolarsDataType, PythonDataType, None]],
Sequence[Union[str, tuple[str, Union[PolarsDataType, PythonDataType, None]]]],
]
SchemaDict: TypeAlias = Mapping[str, PolarsDataType]
NumericLiteral: TypeAlias = Union[int, float, "Decimal"]
TemporalLiteral: TypeAlias = Union["date", "time", "datetime", "timedelta"]
NonNestedLiteral: TypeAlias = Union[NumericLiteral, TemporalLiteral, str, bool, bytes]
# Python literal types (can convert into a `lit` expression)
PythonLiteral: TypeAlias = Union[NonNestedLiteral, "np.ndarray[Any, Any]", list[Any]]
# Inputs that can convert into a `col` expression
IntoExprColumn: TypeAlias = Union["Expr", "Series", str]
# Inputs that can convert into an expression
IntoExpr: TypeAlias = Union[PythonLiteral, IntoExprColumn, None]
ComparisonOperator: TypeAlias = Literal["eq", "neq", "gt", "lt", "gt_eq", "lt_eq"]
# selector type, and related collection/sequence
SelectorType: TypeAlias = "Selector"
ColumnNameOrSelector: TypeAlias = Union[str, SelectorType]
# User-facing string literal types
# The following all have an equivalent Rust enum with the same name
Ambiguous: TypeAlias = Literal["earliest", "latest", "raise", "null"]
AvroCompression: TypeAlias = Literal["uncompressed", "snappy", "deflate"]
CsvQuoteStyle: TypeAlias = Literal["necessary", "always", "non_numeric", "never"]
CategoricalOrdering: TypeAlias = Literal["physical", "lexical"]
CsvEncoding: TypeAlias = Literal["utf8", "utf8-lossy"]
ColumnMapping: TypeAlias = tuple[
Literal["iceberg-column-mapping"],
# This is "pa.Schema". Not typed as that causes pyright strict type checking
# failures for users who don't have pyarrow-stubs installed.
Any,
]
DefaultFieldValues: TypeAlias = tuple[
Literal["iceberg"], dict[int, Union["Series", str]]
]
DeletionFiles: TypeAlias = tuple[
Literal["iceberg-position-delete"], dict[int, list[str]]
]
FillNullStrategy: TypeAlias = Literal[
"forward", "backward", "min", "max", "mean", "zero", "one"
]
FloatFmt: TypeAlias = Literal["full", "mixed"]
IndexOrder: TypeAlias = Literal["c", "fortran"]
IpcCompression: TypeAlias = Literal["uncompressed", "lz4", "zstd"]
JoinValidation: TypeAlias = Literal["m:m", "m:1", "1:m", "1:1"]
Label: TypeAlias = Literal["left", "right", "datapoint"]
MaintainOrderJoin: TypeAlias = Literal[
"none", "left", "right", "left_right", "right_left"
]
NonExistent: TypeAlias = Literal["raise", "null"]
NullBehavior: TypeAlias = Literal["ignore", "drop"]
ParallelStrategy: TypeAlias = Literal[
"auto", "columns", "row_groups", "prefiltered", "none"
]
ParquetCompression: TypeAlias = Literal[
"lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"
]
PivotAgg: TypeAlias = Literal[
"min", "max", "first", "last", "sum", "mean", "median", "len"
]
QuantileMethod: TypeAlias = Literal[
"nearest", "higher", "lower", "midpoint", "linear", "equiprobable"
]
RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal", "random"]
Roll: TypeAlias = Literal["raise", "forward", "backward"]
RoundMode: TypeAlias = Literal["half_to_even", "half_away_from_zero"]
SerializationFormat: TypeAlias = Literal["binary", "json"]
Endianness: TypeAlias = Literal["little", "big"]
SizeUnit: TypeAlias = Literal[
"b",
"kb",
"mb",
"gb",
"tb",
"bytes",
"kilobytes",
"megabytes",
"gigabytes",
"terabytes",
]
StartBy: TypeAlias = Literal[
"window",
"datapoint",
"monday",
"tuesday",
"wednesday",
"thursday",
"friday",
"saturday",
"sunday",
]
SyncOnCloseMethod: TypeAlias = Literal["data", "all"]
TimeUnit: TypeAlias = Literal["ns", "us", "ms"]
UnicodeForm: TypeAlias = Literal["NFC", "NFKC", "NFD", "NFKD"]
UniqueKeepStrategy: TypeAlias = Literal["first", "last", "any", "none"]
UnstackDirection: TypeAlias = Literal["vertical", "horizontal"]
MapElementsStrategy: TypeAlias = Literal["thread_local", "threading"]
# The following have a Rust enum equivalent with a different name
AsofJoinStrategy: TypeAlias = Literal["backward", "forward", "nearest"] # AsofStrategy
ClosedInterval: TypeAlias = Literal["left", "right", "both", "none"] # ClosedWindow
InterpolationMethod: TypeAlias = Literal["linear", "nearest"]
JoinStrategy: TypeAlias = Literal[
"inner", "left", "right", "full", "semi", "anti", "cross", "outer"
] # JoinType
ListToStructWidthStrategy: TypeAlias = Literal["first_non_null", "max_width"]
# The following have no equivalent on the Rust side
ConcatMethod = Literal[
"vertical",
"vertical_relaxed",
"diagonal",
"diagonal_relaxed",
"horizontal",
"align",
"align_full",
"align_inner",
"align_left",
"align_right",
]
CorrelationMethod: TypeAlias = Literal["pearson", "spearman"]
DbReadEngine: TypeAlias = Literal["adbc", "connectorx"]
DbWriteEngine: TypeAlias = Literal["sqlalchemy", "adbc"]
DbWriteMode: TypeAlias = Literal["replace", "append", "fail"]
EpochTimeUnit = Literal["ns", "us", "ms", "s", "d"]
JaxExportType: TypeAlias = Literal["array", "dict"]
Orientation: TypeAlias = Literal["col", "row"]
SearchSortedSide: TypeAlias = Literal["any", "left", "right"]
TorchExportType: TypeAlias = Literal["tensor", "dataset", "dict"]
TransferEncoding: TypeAlias = Literal["hex", "base64"]
WindowMappingStrategy: TypeAlias = Literal["group_to_rows", "join", "explode"]
ExplainFormat: TypeAlias = Literal["plain", "tree"]
# type signature for allowed frame init
FrameInitTypes: TypeAlias = Union[
Mapping[str, Union[Sequence[object], Mapping[str, Sequence[object]], "Series"]],
Sequence[Any],
"np.ndarray[Any, Any]",
"pa.Table",
"pd.DataFrame",
"ArrowArrayExportable",
"ArrowStreamExportable",
"torch.Tensor",
]
# Excel IO
ColumnFormatDict: TypeAlias = Mapping[
# dict of colname(s) or selector(s) to format string or dict
Union[ColumnNameOrSelector, tuple[ColumnNameOrSelector, ...]],
Union[str, Mapping[str, str]],
]
ConditionalFormatDict: TypeAlias = Mapping[
# dict of colname(s) to str, dict, or sequence of str/dict
Union[ColumnNameOrSelector, Collection[str]],
Union[str, Union[Mapping[str, Any], Sequence[Union[str, Mapping[str, Any]]]]],
]
ColumnTotalsDefinition: TypeAlias = Union[
# dict of colname(s) to str, a collection of str, or a boolean
Mapping[Union[ColumnNameOrSelector, tuple[ColumnNameOrSelector]], str],
Sequence[str],
bool,
]
ColumnWidthsDefinition: TypeAlias = Union[
Mapping[ColumnNameOrSelector, Union[tuple[str, ...], int]], int
]
RowTotalsDefinition: TypeAlias = Union[
# dict of colname to str(s), a collection of str, or a boolean
Mapping[str, Union[str, Collection[str]]],
Collection[str],
bool,
]
# standard/named hypothesis profiles used for parametric testing
ParametricProfileNames: TypeAlias = Literal["fast", "balanced", "expensive"]
# typevars for core polars types
PolarsType = TypeVar("PolarsType", "DataFrame", "LazyFrame", "Series", "Expr")
FrameType = TypeVar("FrameType", "DataFrame", "LazyFrame")
BufferInfo: TypeAlias = tuple[int, int, int]
# type alias for supported spreadsheet engines
ExcelSpreadsheetEngine: TypeAlias = Literal["calamine", "openpyxl", "xlsx2csv"]
class SeriesBuffers(TypedDict):
"""Underlying buffers of a Series."""
values: Series
validity: Series | None
offsets: Series | None
# minimal protocol definitions that can reasonably represent
# an executable connection, cursor, or equivalent object
class BasicConnection(Protocol):
def cursor(self, *args: Any, **kwargs: Any) -> Any:
"""Return a cursor object."""
class BasicCursor(Protocol):
def execute(self, *args: Any, **kwargs: Any) -> Any:
"""Execute a query."""
class Cursor(BasicCursor):
def fetchall(self, *args: Any, **kwargs: Any) -> Any:
"""Fetch all results."""
def fetchmany(self, *args: Any, **kwargs: Any) -> Any:
"""Fetch results in batches."""
AlchemyConnection: TypeAlias = Union["Connection", "Engine", "Session"]
ConnectionOrCursor: TypeAlias = Union[
BasicConnection, BasicCursor, Cursor, AlchemyConnection
]
# Annotations for `__getitem__` methods
SingleIndexSelector: TypeAlias = int
MultiIndexSelector: TypeAlias = Union[
slice,
range,
Sequence[int],
"Series",
"np.ndarray[Any, Any]",
]
SingleNameSelector: TypeAlias = str
MultiNameSelector: TypeAlias = Union[
slice,
Sequence[str],
"Series",
"np.ndarray[Any, Any]",
]
BooleanMask: TypeAlias = Union[
Sequence[bool],
"Series",
"np.ndarray[Any, Any]",
]
SingleColSelector: TypeAlias = Union[SingleIndexSelector, SingleNameSelector]
MultiColSelector: TypeAlias = Union[MultiIndexSelector, MultiNameSelector, BooleanMask]
# LazyFrame engine selection
EngineType: TypeAlias = Union[
Literal["auto", "in-memory", "streaming", "gpu"], "GPUEngine"
]
PlanStage: TypeAlias = Literal["ir", "physical"]
FileSource: TypeAlias = Union[
str,
Path,
IO[bytes],
bytes,
list[str],
list[Path],
list[IO[bytes]],
list[bytes],
]
JSONEncoder = Union[Callable[[Any], bytes], Callable[[Any], str]]
DeprecationType: TypeAlias = Literal[
"function",
"renamed_parameter",
"streaming_parameter",
"nonkeyword_arguments",
"parameter_as_multi_positional",
]
class PartitioningScheme:
def __init__(
self,
py_partitioning: PyPartitioning,
) -> None:
self._py_partitioning = py_partitioning
@property
def _base_path(self) -> str | None:
return self._py_partitioning.base_path
__all__ = [
"Ambiguous",
"ArrowArrayExportable",
"ArrowStreamExportable",
"AsofJoinStrategy",
"AvroCompression",
"BooleanMask",
"BufferInfo",
"CategoricalOrdering",
"ClosedInterval",
"ColumnFormatDict",
"ColumnNameOrSelector",
"ColumnTotalsDefinition",
"ColumnWidthsDefinition",
"ComparisonOperator",
"ConcatMethod",
"ConditionalFormatDict",
"ConnectionOrCursor",
"CorrelationMethod",
"CsvEncoding",
"CsvQuoteStyle",
"Cursor",
"DbReadEngine",
"DbWriteEngine",
"DbWriteMode",
"DeprecationType",
"Endianness",
"EngineType",
"EpochTimeUnit",
"ExcelSpreadsheetEngine",
"ExplainFormat",
"FileSource",
"FillNullStrategy",
"FloatFmt",
"FrameInitTypes",
"FrameType",
"IndexOrder",
"InterpolationMethod",
"IntoExpr",
"IntoExprColumn",
"IpcCompression",
"JSONEncoder",
"JaxExportType",
"JoinStrategy",
"JoinValidation",
"Label",
"ListToStructWidthStrategy",
"MaintainOrderJoin",
"MapElementsStrategy",
"MultiColSelector",
"MultiIndexSelector",
"MultiNameSelector",
"NonExistent",
"NonNestedLiteral",
"NullBehavior",
"NumericLiteral",
"OneOrMoreDataTypes",
"Orientation",
"ParallelStrategy",
"ParametricProfileNames",
"ParquetCompression",
"PartitioningScheme",
"PivotAgg",
"PolarsDataType",
"PolarsIntegerType",
"PolarsTemporalType",
"PolarsType",
"PythonDataType",
"PythonLiteral",
"QuantileMethod",
"RankMethod",
"Roll",
"RowTotalsDefinition",
"SchemaDefinition",
"SchemaDict",
"SearchSortedSide",
"SelectorType",
"SerializationFormat",
"SeriesBuffers",
"SingleColSelector",
"SingleIndexSelector",
"SingleNameSelector",
"SizeUnit",
"StartBy",
"SyncOnCloseMethod",
"TemporalLiteral",
"TimeUnit",
"TorchExportType",
"TransferEncoding",
"UnicodeForm",
"UniqueKeepStrategy",
"UnstackDirection",
"WindowMappingStrategy",
]
class ParquetMetadataContext:
"""
The context given when writing file-level parquet metadata.
.. warning::
This functionality is considered **experimental**. It may be removed or
changed at any point without it being considered a breaking change.
"""
def __init__(self, *, arrow_schema: str) -> None:
self.arrow_schema = arrow_schema
arrow_schema: str #: The base64 encoded arrow schema that is going to be written into metadata.
ParquetMetadataFn: TypeAlias = Callable[[ParquetMetadataContext], dict[str, str]]
ParquetMetadata: TypeAlias = Union[dict[str, str], ParquetMetadataFn]