DriverTrac/venv/lib/python3.12/site-packages/polars/_cpu_check.py

266 lines
9.5 KiB
Python

# Vendored parts of the code from https://github.com/flababah/cpuid.py,
# so we replicate its copyright license.
# Copyright (c) 2014 Anders Høst
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of
# this software and associated documentation files (the "Software"), to deal in
# the Software without restriction, including without limitation the rights to
# use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so,
# subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
# FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
# COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
from __future__ import annotations
import ctypes
import os
from ctypes import CFUNCTYPE, POINTER, c_long, c_size_t, c_uint32, c_ulong, c_void_p
from typing import ClassVar
"""
Determine whether Polars can be run on the current CPU.
This must be done in pure Python, before the Polars binary is imported. If we
were to try it on the Rust side the compiler could emit illegal instructions
before/during the CPU feature check code.
"""
_IS_WINDOWS = os.name == "nt"
_IS_64BIT = ctypes.sizeof(ctypes.c_void_p) == 8
def get_runtime_repr() -> str:
import polars._plr as plr
return plr.RUNTIME_REPR
def _open_posix_libc() -> ctypes.CDLL:
# Avoid importing ctypes.util if possible.
try:
if os.uname().sysname == "Darwin":
return ctypes.CDLL("libc.dylib", use_errno=True)
else:
return ctypes.CDLL("libc.so.6", use_errno=True)
except Exception:
from ctypes import util as ctutil
return ctypes.CDLL(ctutil.find_library("c"), use_errno=True)
# Posix x86_64:
# Three first call registers : RDI, RSI, RDX
# Volatile registers : RAX, RCX, RDX, RSI, RDI, R8-11
# Windows x86_64:
# Three first call registers : RCX, RDX, R8
# Volatile registers : RAX, RCX, RDX, R8-11
# cdecl 32 bit:
# Three first call registers : Stack (%esp)
# Volatile registers : EAX, ECX, EDX
# fmt: off
_POSIX_64_OPC = [
0x53, # push %rbx
0x89, 0xf0, # mov %esi,%eax
0x89, 0xd1, # mov %edx,%ecx
0x0f, 0xa2, # cpuid
0x89, 0x07, # mov %eax,(%rdi)
0x89, 0x5f, 0x04, # mov %ebx,0x4(%rdi)
0x89, 0x4f, 0x08, # mov %ecx,0x8(%rdi)
0x89, 0x57, 0x0c, # mov %edx,0xc(%rdi)
0x5b, # pop %rbx
0xc3 # retq
]
_WINDOWS_64_OPC = [
0x53, # push %rbx
0x89, 0xd0, # mov %edx,%eax
0x49, 0x89, 0xc9, # mov %rcx,%r9
0x44, 0x89, 0xc1, # mov %r8d,%ecx
0x0f, 0xa2, # cpuid
0x41, 0x89, 0x01, # mov %eax,(%r9)
0x41, 0x89, 0x59, 0x04, # mov %ebx,0x4(%r9)
0x41, 0x89, 0x49, 0x08, # mov %ecx,0x8(%r9)
0x41, 0x89, 0x51, 0x0c, # mov %edx,0xc(%r9)
0x5b, # pop %rbx
0xc3 # retq
]
_CDECL_32_OPC = [
0x53, # push %ebx
0x57, # push %edi
0x8b, 0x7c, 0x24, 0x0c, # mov 0xc(%esp),%edi
0x8b, 0x44, 0x24, 0x10, # mov 0x10(%esp),%eax
0x8b, 0x4c, 0x24, 0x14, # mov 0x14(%esp),%ecx
0x0f, 0xa2, # cpuid
0x89, 0x07, # mov %eax,(%edi)
0x89, 0x5f, 0x04, # mov %ebx,0x4(%edi)
0x89, 0x4f, 0x08, # mov %ecx,0x8(%edi)
0x89, 0x57, 0x0c, # mov %edx,0xc(%edi)
0x5f, # pop %edi
0x5b, # pop %ebx
0xc3 # ret
]
# fmt: on
# From memoryapi.h
_MEM_COMMIT = 0x1000
_MEM_RESERVE = 0x2000
_MEM_RELEASE = 0x8000
_PAGE_EXECUTE_READWRITE = 0x40
class CPUID_struct(ctypes.Structure):
_fields_: ClassVar[list[tuple[str, type]]] = [
(r, c_uint32) for r in ("eax", "ebx", "ecx", "edx")
]
class CPUID:
def __init__(self) -> None:
if _IS_WINDOWS:
if _IS_64BIT:
# VirtualAlloc seems to fail under some weird
# circumstances when ctypes.windll.kernel32 is
# used under 64 bit Python. CDLL fixes this.
self.win = ctypes.CDLL("kernel32.dll")
opc = _WINDOWS_64_OPC
else:
# Here ctypes.windll.kernel32 is needed to get the
# right DLL. Otherwise it will fail when running
# 32 bit Python on 64 bit Windows.
self.win = ctypes.windll.kernel32 # type: ignore[attr-defined]
opc = _CDECL_32_OPC
else:
opc = _POSIX_64_OPC if _IS_64BIT else _CDECL_32_OPC
size = len(opc)
code = (ctypes.c_ubyte * size)(*opc)
if _IS_WINDOWS:
self.win.VirtualAlloc.restype = c_void_p
self.win.VirtualAlloc.argtypes = [
ctypes.c_void_p,
ctypes.c_size_t,
ctypes.c_ulong,
ctypes.c_ulong,
]
self.addr = self.win.VirtualAlloc(
None, size, _MEM_COMMIT | _MEM_RESERVE, _PAGE_EXECUTE_READWRITE
)
if not self.addr:
msg = "could not allocate memory for CPUID check"
raise MemoryError(msg)
ctypes.memmove(self.addr, code, size)
else:
import mmap # Only import if necessary.
# On some platforms PROT_WRITE + PROT_EXEC is forbidden, so we first
# only write and then mprotect into PROT_EXEC.
libc = _open_posix_libc()
mprotect = libc.mprotect
mprotect.argtypes = (ctypes.c_void_p, ctypes.c_size_t, ctypes.c_int)
mprotect.restype = ctypes.c_int
self.mmap = mmap.mmap(
-1,
size,
mmap.MAP_PRIVATE | mmap.MAP_ANONYMOUS,
mmap.PROT_READ | mmap.PROT_WRITE,
)
self.addr = ctypes.addressof(ctypes.c_void_p.from_buffer(self.mmap))
self.mmap.write(code)
if mprotect(self.addr, size, mmap.PROT_READ | mmap.PROT_EXEC) != 0:
msg = "could not execute mprotect for CPUID check"
raise RuntimeError(msg)
func_type = CFUNCTYPE(None, POINTER(CPUID_struct), c_uint32, c_uint32)
self.func_ptr = func_type(self.addr)
def __call__(self, eax: int, ecx: int = 0) -> CPUID_struct:
struct = CPUID_struct()
self.func_ptr(struct, eax, ecx)
return struct
def __del__(self) -> None:
if _IS_WINDOWS:
self.win.VirtualFree.restype = c_long
self.win.VirtualFree.argtypes = [c_void_p, c_size_t, c_ulong]
self.win.VirtualFree(self.addr, 0, _MEM_RELEASE)
def _read_cpu_flags() -> dict[str, bool]:
# CPU flags from https://en.wikipedia.org/wiki/CPUID
cpuid = CPUID()
cpuid1 = cpuid(1, 0)
cpuid7 = cpuid(7, 0)
cpuid81h = cpuid(0x80000001, 0)
return {
"sse3": bool(cpuid1.ecx & (1 << 0)),
"ssse3": bool(cpuid1.ecx & (1 << 9)),
"fma": bool(cpuid1.ecx & (1 << 12)),
"cmpxchg16b": bool(cpuid1.ecx & (1 << 13)),
"sse4.1": bool(cpuid1.ecx & (1 << 19)),
"sse4.2": bool(cpuid1.ecx & (1 << 20)),
"movbe": bool(cpuid1.ecx & (1 << 22)),
"popcnt": bool(cpuid1.ecx & (1 << 23)),
"pclmulqdq": bool(cpuid1.ecx & (1 << 1)),
"avx": bool(cpuid1.ecx & (1 << 28)),
"bmi1": bool(cpuid7.ebx & (1 << 3)),
"bmi2": bool(cpuid7.ebx & (1 << 8)),
"avx2": bool(cpuid7.ebx & (1 << 5)),
"lzcnt": bool(cpuid81h.ecx & (1 << 5)),
}
def check_cpu_flags(feature_flags: str) -> None:
if not feature_flags or os.environ.get("POLARS_SKIP_CPU_CHECK"):
return
expected_cpu_flags = [f.lstrip("+") for f in feature_flags.split(",")]
supported_cpu_flags = _read_cpu_flags()
missing_features = []
for f in expected_cpu_flags:
if f not in supported_cpu_flags:
msg = f"unknown feature flag: {f!r}"
raise RuntimeError(msg)
if not supported_cpu_flags[f]:
missing_features.append(f)
if missing_features:
import warnings # Only import if necessary.
warnings.warn(
f"""Missing required CPU features.
The following required CPU features were not detected:
{", ".join(missing_features)}
Continuing to use this version of Polars on this processor will likely result in a crash.
Install `polars[rtcompat]` instead of `polars` to run Polars with better compatibility.
Hint: If you are on an Apple ARM machine (e.g. M1) this is likely due to running Python under Rosetta.
It is recommended to install a native version of Python that does not run under Rosetta x86-64 emulation.
If you believe this warning to be a false positive, you can set the `POLARS_SKIP_CPU_CHECK` environment variable to bypass this check.
""",
RuntimeWarning,
stacklevel=1,
)