from __future__ import annotations from enum import IntEnum from typing import ( TYPE_CHECKING, Any, ClassVar, Literal, Protocol, TypedDict, ) from polars._utils.unstable import issue_unstable_warning if TYPE_CHECKING: import sys from collections.abc import Iterable, Sequence from polars.interchange.buffer import PolarsBuffer from polars.interchange.column import PolarsColumn if sys.version_info >= (3, 10): from typing import TypeAlias else: from typing_extensions import TypeAlias class DlpackDeviceType(IntEnum): """Integer enum for device type codes matching DLPack.""" CPU = 1 CUDA = 2 CPU_PINNED = 3 OPENCL = 4 VULKAN = 7 METAL = 8 VPI = 9 ROCM = 10 class DtypeKind(IntEnum): """ Integer enum for data types. Attributes ---------- INT : int Matches to signed integer data type. UINT : int Matches to unsigned integer data type. FLOAT : int Matches to floating point data type. BOOL : int Matches to boolean data type. STRING : int Matches to string data type (UTF-8 encoded). DATETIME : int Matches to datetime data type. CATEGORICAL : int Matches to categorical data type. """ INT = 0 UINT = 1 FLOAT = 2 BOOL = 20 STRING = 21 # UTF-8 DATETIME = 22 CATEGORICAL = 23 Dtype: TypeAlias = tuple[DtypeKind, int, str, str] # see Column.dtype class ColumnNullType(IntEnum): """ Integer enum for null type representation. Attributes ---------- NON_NULLABLE : int Non-nullable column. USE_NAN : int Use explicit float NaN value. USE_SENTINEL : int Sentinel value besides NaN. USE_BITMASK : int The bit is set/unset representing a null on a certain position. USE_BYTEMASK : int The byte is set/unset representing a null on a certain position. """ NON_NULLABLE = 0 USE_NAN = 1 USE_SENTINEL = 2 USE_BITMASK = 3 USE_BYTEMASK = 4 class ColumnBuffers(TypedDict): """Buffers backing a column.""" # first element is a buffer containing the column data; # second element is the data buffer's associated dtype data: tuple[PolarsBuffer, Dtype] # first element is a buffer containing mask values indicating missing data; # second element is the mask value buffer's associated dtype. # None if the null representation is not a bit or byte mask validity: tuple[PolarsBuffer, Dtype] | None # first element is a buffer containing the offset values for # variable-size binary data (e.g., variable-length strings); # second element is the offsets buffer's associated dtype. # None if the data buffer does not have an associated offsets buffer offsets: tuple[PolarsBuffer, Dtype] | None class CategoricalDescription(TypedDict): """Description of a categorical column.""" # whether the ordering of dictionary indices is semantically meaningful is_ordered: bool # whether a dictionary-style mapping of categorical values to other objects exists is_dictionary: Literal[True] # Python-level only (e.g. `{int: str}`). # None if not a dictionary-style categorical. categories: PolarsColumn class Buffer(Protocol): """Interchange buffer object.""" @property def bufsize(self) -> int: """Buffer size in bytes.""" @property def ptr(self) -> int: """Pointer to start of the buffer as an integer.""" def __dlpack__(self) -> Any: """Represent this structure as DLPack interface.""" def __dlpack_device__(self) -> tuple[DlpackDeviceType, int | None]: """Device type and device ID for where the data in the buffer resides.""" class Column(Protocol): """Interchange column object.""" def size(self) -> int: """Size of the column in elements.""" @property def offset(self) -> int: """Offset of the first element with respect to the start of the underlying buffer.""" # noqa: W505 @property def dtype(self) -> Dtype: """Data type of the column.""" @property def describe_categorical(self) -> CategoricalDescription: """Description of the categorical data type of the column.""" @property def describe_null(self) -> tuple[ColumnNullType, Any]: """Description of the null representation the column uses.""" @property def null_count(self) -> int | None: """Number of null elements, if known.""" @property def metadata(self) -> dict[str, Any]: """The metadata for the column.""" def num_chunks(self) -> int: """Return the number of chunks the column consists of.""" def get_chunks(self, n_chunks: int | None = None) -> Iterable[Column]: """Return an iterator yielding the column chunks.""" def get_buffers(self) -> ColumnBuffers: """Return a dictionary containing the underlying buffers.""" class DataFrame(Protocol): """Interchange dataframe object.""" version: ClassVar[int] # Version of the protocol def __dataframe__( self, nan_as_null: bool = False, # noqa: FBT001 allow_copy: bool = True, # noqa: FBT001 ) -> DataFrame: """Convert to a dataframe object implementing the dataframe interchange protocol.""" # noqa: W505 @property def metadata(self) -> dict[str, Any]: """The metadata for the dataframe.""" def num_columns(self) -> int: """Return the number of columns in the dataframe.""" def num_rows(self) -> int | None: """Return the number of rows in the dataframe, if available.""" def num_chunks(self) -> int: """Return the number of chunks the dataframe consists of..""" def column_names(self) -> Iterable[str]: """Return the column names.""" def get_column(self, i: int) -> Column: """Return the column at the indicated position.""" def get_column_by_name(self, name: str) -> Column: """Return the column with the given name.""" def get_columns(self) -> Iterable[Column]: """Return an iterator yielding the columns.""" def select_columns(self, indices: Sequence[int]) -> DataFrame: """Create a new dataframe by selecting a subset of columns by index.""" def select_columns_by_name(self, names: Sequence[str]) -> DataFrame: """Create a new dataframe by selecting a subset of columns by name.""" def get_chunks(self, n_chunks: int | None = None) -> Iterable[DataFrame]: """Return an iterator yielding the chunks of the dataframe.""" class SupportsInterchange(Protocol): """Dataframe that supports conversion into an interchange dataframe object.""" def __dataframe__( self, nan_as_null: bool = False, # noqa: FBT001 allow_copy: bool = True, # noqa: FBT001 ) -> SupportsInterchange: """Convert to a dataframe object implementing the dataframe interchange protocol.""" # noqa: W505 class Endianness: """Enum indicating the byte-order of a data type.""" LITTLE = "<" BIG = ">" NATIVE = "=" NA = "|" class CopyNotAllowedError(RuntimeError): """Exception raised when a copy is required, but `allow_copy` is set to `False`.""" class CompatLevel: """Data structure compatibility level.""" _version: int def __init__(self) -> None: msg = "it is not allowed to create a CompatLevel object" raise TypeError(msg) @staticmethod def _with_version(version: int) -> CompatLevel: compat_level = CompatLevel.__new__(CompatLevel) compat_level._version = version return compat_level @staticmethod def _newest() -> CompatLevel: return CompatLevel._future1 # type: ignore[attr-defined] @staticmethod def newest() -> CompatLevel: """ Get the highest supported compatibility level. .. warning:: Highest compatibility level is considered **unstable**. It may be changed at any point without it being considered a breaking change. """ issue_unstable_warning( "using the highest compatibility level is considered unstable." ) return CompatLevel._newest() @staticmethod def oldest() -> CompatLevel: """Get the most compatible level.""" return CompatLevel._compatible # type: ignore[attr-defined] def __repr__(self) -> str: return f"<{self.__class__.__module__}.{self.__class__.__qualname__}: {self._version}>" CompatLevel._compatible = CompatLevel._with_version(0) # type: ignore[attr-defined] CompatLevel._future1 = CompatLevel._with_version(1) # type: ignore[attr-defined]