_typing.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373
  1. from __future__ import annotations
  2. from datetime import (
  3. datetime,
  4. timedelta,
  5. tzinfo,
  6. )
  7. from os import PathLike
  8. from typing import (
  9. TYPE_CHECKING,
  10. Any,
  11. Callable,
  12. Dict,
  13. Hashable,
  14. Iterator,
  15. List,
  16. Literal,
  17. Mapping,
  18. Optional,
  19. Protocol,
  20. Sequence,
  21. Tuple,
  22. Type as type_t,
  23. TypeVar,
  24. Union,
  25. )
  26. import numpy as np
  27. # To prevent import cycles place any internal imports in the branch below
  28. # and use a string literal forward reference to it in subsequent types
  29. # https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
  30. if TYPE_CHECKING:
  31. import numpy.typing as npt
  32. from pandas._libs import (
  33. NaTType,
  34. Period,
  35. Timedelta,
  36. Timestamp,
  37. )
  38. from pandas._libs.tslibs import BaseOffset
  39. from pandas.core.dtypes.dtypes import ExtensionDtype
  40. from pandas import Interval
  41. from pandas.arrays import (
  42. DatetimeArray,
  43. TimedeltaArray,
  44. )
  45. from pandas.core.arrays.base import ExtensionArray
  46. from pandas.core.frame import DataFrame
  47. from pandas.core.generic import NDFrame
  48. from pandas.core.groupby.generic import (
  49. DataFrameGroupBy,
  50. GroupBy,
  51. SeriesGroupBy,
  52. )
  53. from pandas.core.indexes.base import Index
  54. from pandas.core.internals import (
  55. ArrayManager,
  56. BlockManager,
  57. SingleArrayManager,
  58. SingleBlockManager,
  59. )
  60. from pandas.core.resample import Resampler
  61. from pandas.core.series import Series
  62. from pandas.core.window.rolling import BaseWindow
  63. from pandas.io.formats.format import EngFormatter
  64. ScalarLike_co = Union[
  65. int,
  66. float,
  67. complex,
  68. str,
  69. bytes,
  70. np.generic,
  71. ]
  72. # numpy compatible types
  73. NumpyValueArrayLike = Union[ScalarLike_co, npt.ArrayLike]
  74. # Name "npt._ArrayLikeInt_co" is not defined [name-defined]
  75. NumpySorter = Optional[npt._ArrayLikeInt_co] # type: ignore[name-defined]
  76. else:
  77. npt: Any = None
  78. HashableT = TypeVar("HashableT", bound=Hashable)
  79. # array-like
  80. ArrayLike = Union["ExtensionArray", np.ndarray]
  81. AnyArrayLike = Union[ArrayLike, "Index", "Series"]
  82. TimeArrayLike = Union["DatetimeArray", "TimedeltaArray"]
  83. # scalars
  84. PythonScalar = Union[str, float, bool]
  85. DatetimeLikeScalar = Union["Period", "Timestamp", "Timedelta"]
  86. PandasScalar = Union["Period", "Timestamp", "Timedelta", "Interval"]
  87. Scalar = Union[PythonScalar, PandasScalar, np.datetime64, np.timedelta64, datetime]
  88. IntStrT = TypeVar("IntStrT", int, str)
  89. # timestamp and timedelta convertible types
  90. TimestampConvertibleTypes = Union[
  91. "Timestamp", datetime, np.datetime64, np.int64, float, str
  92. ]
  93. TimedeltaConvertibleTypes = Union[
  94. "Timedelta", timedelta, np.timedelta64, np.int64, float, str
  95. ]
  96. Timezone = Union[str, tzinfo]
  97. # NDFrameT is stricter and ensures that the same subclass of NDFrame always is
  98. # used. E.g. `def func(a: NDFrameT) -> NDFrameT: ...` means that if a
  99. # Series is passed into a function, a Series is always returned and if a DataFrame is
  100. # passed in, a DataFrame is always returned.
  101. NDFrameT = TypeVar("NDFrameT", bound="NDFrame")
  102. NumpyIndexT = TypeVar("NumpyIndexT", np.ndarray, "Index")
  103. AxisInt = int
  104. Axis = Union[AxisInt, Literal["index", "columns", "rows"]]
  105. IndexLabel = Union[Hashable, Sequence[Hashable]]
  106. Level = Hashable
  107. Shape = Tuple[int, ...]
  108. Suffixes = Tuple[Optional[str], Optional[str]]
  109. Ordered = Optional[bool]
  110. JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
  111. Frequency = Union[str, "BaseOffset"]
  112. Axes = Union[AnyArrayLike, List, range]
  113. RandomState = Union[
  114. int,
  115. ArrayLike,
  116. np.random.Generator,
  117. np.random.BitGenerator,
  118. np.random.RandomState,
  119. ]
  120. # dtypes
  121. NpDtype = Union[str, np.dtype, type_t[Union[str, complex, bool, object]]]
  122. Dtype = Union["ExtensionDtype", NpDtype]
  123. AstypeArg = Union["ExtensionDtype", "npt.DTypeLike"]
  124. # DtypeArg specifies all allowable dtypes in a functions its dtype argument
  125. DtypeArg = Union[Dtype, Dict[Hashable, Dtype]]
  126. DtypeObj = Union[np.dtype, "ExtensionDtype"]
  127. # converters
  128. ConvertersArg = Dict[Hashable, Callable[[Dtype], Dtype]]
  129. # parse_dates
  130. ParseDatesArg = Union[
  131. bool, List[Hashable], List[List[Hashable]], Dict[Hashable, List[Hashable]]
  132. ]
  133. # For functions like rename that convert one label to another
  134. Renamer = Union[Mapping[Any, Hashable], Callable[[Any], Hashable]]
  135. # to maintain type information across generic functions and parametrization
  136. T = TypeVar("T")
  137. # used in decorators to preserve the signature of the function it decorates
  138. # see https://mypy.readthedocs.io/en/stable/generics.html#declaring-decorators
  139. FuncType = Callable[..., Any]
  140. F = TypeVar("F", bound=FuncType)
  141. # types of vectorized key functions for DataFrame::sort_values and
  142. # DataFrame::sort_index, among others
  143. ValueKeyFunc = Optional[Callable[["Series"], Union["Series", AnyArrayLike]]]
  144. IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]]
  145. # types of `func` kwarg for DataFrame.aggregate and Series.aggregate
  146. AggFuncTypeBase = Union[Callable, str]
  147. AggFuncTypeDict = Dict[Hashable, Union[AggFuncTypeBase, List[AggFuncTypeBase]]]
  148. AggFuncType = Union[
  149. AggFuncTypeBase,
  150. List[AggFuncTypeBase],
  151. AggFuncTypeDict,
  152. ]
  153. AggObjType = Union[
  154. "Series",
  155. "DataFrame",
  156. "GroupBy",
  157. "SeriesGroupBy",
  158. "DataFrameGroupBy",
  159. "BaseWindow",
  160. "Resampler",
  161. ]
  162. PythonFuncType = Callable[[Any], Any]
  163. # filenames and file-like-objects
  164. AnyStr_co = TypeVar("AnyStr_co", str, bytes, covariant=True)
  165. AnyStr_contra = TypeVar("AnyStr_contra", str, bytes, contravariant=True)
  166. class BaseBuffer(Protocol):
  167. @property
  168. def mode(self) -> str:
  169. # for _get_filepath_or_buffer
  170. ...
  171. def seek(self, __offset: int, __whence: int = ...) -> int:
  172. # with one argument: gzip.GzipFile, bz2.BZ2File
  173. # with two arguments: zip.ZipFile, read_sas
  174. ...
  175. def seekable(self) -> bool:
  176. # for bz2.BZ2File
  177. ...
  178. def tell(self) -> int:
  179. # for zip.ZipFile, read_stata, to_stata
  180. ...
  181. class ReadBuffer(BaseBuffer, Protocol[AnyStr_co]):
  182. def read(self, __n: int = ...) -> AnyStr_co:
  183. # for BytesIOWrapper, gzip.GzipFile, bz2.BZ2File
  184. ...
  185. class WriteBuffer(BaseBuffer, Protocol[AnyStr_contra]):
  186. def write(self, __b: AnyStr_contra) -> Any:
  187. # for gzip.GzipFile, bz2.BZ2File
  188. ...
  189. def flush(self) -> Any:
  190. # for gzip.GzipFile, bz2.BZ2File
  191. ...
  192. class ReadPickleBuffer(ReadBuffer[bytes], Protocol):
  193. def readline(self) -> bytes:
  194. ...
  195. class WriteExcelBuffer(WriteBuffer[bytes], Protocol):
  196. def truncate(self, size: int | None = ...) -> int:
  197. ...
  198. class ReadCsvBuffer(ReadBuffer[AnyStr_co], Protocol):
  199. def __iter__(self) -> Iterator[AnyStr_co]:
  200. # for engine=python
  201. ...
  202. def fileno(self) -> int:
  203. # for _MMapWrapper
  204. ...
  205. def readline(self) -> AnyStr_co:
  206. # for engine=python
  207. ...
  208. @property
  209. def closed(self) -> bool:
  210. # for enine=pyarrow
  211. ...
  212. FilePath = Union[str, "PathLike[str]"]
  213. # for arbitrary kwargs passed during reading/writing files
  214. StorageOptions = Optional[Dict[str, Any]]
  215. # compression keywords and compression
  216. CompressionDict = Dict[str, Any]
  217. CompressionOptions = Optional[
  218. Union[Literal["infer", "gzip", "bz2", "zip", "xz", "zstd", "tar"], CompressionDict]
  219. ]
  220. # types in DataFrameFormatter
  221. FormattersType = Union[
  222. List[Callable], Tuple[Callable, ...], Mapping[Union[str, int], Callable]
  223. ]
  224. ColspaceType = Mapping[Hashable, Union[str, int]]
  225. FloatFormatType = Union[str, Callable, "EngFormatter"]
  226. ColspaceArgType = Union[
  227. str, int, Sequence[Union[str, int]], Mapping[Hashable, Union[str, int]]
  228. ]
  229. # Arguments for fillna()
  230. FillnaOptions = Literal["backfill", "bfill", "ffill", "pad"]
  231. # internals
  232. Manager = Union[
  233. "ArrayManager", "SingleArrayManager", "BlockManager", "SingleBlockManager"
  234. ]
  235. SingleManager = Union["SingleArrayManager", "SingleBlockManager"]
  236. Manager2D = Union["ArrayManager", "BlockManager"]
  237. # indexing
  238. # PositionalIndexer -> valid 1D positional indexer, e.g. can pass
  239. # to ndarray.__getitem__
  240. # ScalarIndexer is for a single value as the index
  241. # SequenceIndexer is for list like or slices (but not tuples)
  242. # PositionalIndexerTuple is extends the PositionalIndexer for 2D arrays
  243. # These are used in various __getitem__ overloads
  244. # TODO(typing#684): add Ellipsis, see
  245. # https://github.com/python/typing/issues/684#issuecomment-548203158
  246. # https://bugs.python.org/issue41810
  247. # Using List[int] here rather than Sequence[int] to disallow tuples.
  248. ScalarIndexer = Union[int, np.integer]
  249. SequenceIndexer = Union[slice, List[int], np.ndarray]
  250. PositionalIndexer = Union[ScalarIndexer, SequenceIndexer]
  251. PositionalIndexerTuple = Tuple[PositionalIndexer, PositionalIndexer]
  252. PositionalIndexer2D = Union[PositionalIndexer, PositionalIndexerTuple]
  253. if TYPE_CHECKING:
  254. TakeIndexer = Union[Sequence[int], Sequence[np.integer], npt.NDArray[np.integer]]
  255. else:
  256. TakeIndexer = Any
  257. # Shared by functions such as drop and astype
  258. IgnoreRaise = Literal["ignore", "raise"]
  259. # Windowing rank methods
  260. WindowingRankType = Literal["average", "min", "max"]
  261. # read_csv engines
  262. CSVEngine = Literal["c", "python", "pyarrow", "python-fwf"]
  263. # read_json engines
  264. JSONEngine = Literal["ujson", "pyarrow"]
  265. # read_xml parsers
  266. XMLParsers = Literal["lxml", "etree"]
  267. # Interval closed type
  268. IntervalLeftRight = Literal["left", "right"]
  269. IntervalClosedType = Union[IntervalLeftRight, Literal["both", "neither"]]
  270. # datetime and NaTType
  271. DatetimeNaTType = Union[datetime, "NaTType"]
  272. DateTimeErrorChoices = Union[IgnoreRaise, Literal["coerce"]]
  273. # sort_index
  274. SortKind = Literal["quicksort", "mergesort", "heapsort", "stable"]
  275. NaPosition = Literal["first", "last"]
  276. # quantile interpolation
  277. QuantileInterpolation = Literal["linear", "lower", "higher", "midpoint", "nearest"]
  278. # plotting
  279. PlottingOrientation = Literal["horizontal", "vertical"]
  280. # dropna
  281. AnyAll = Literal["any", "all"]
  282. # merge
  283. MergeHow = Literal["left", "right", "inner", "outer", "cross"]
  284. # join
  285. JoinHow = Literal["left", "right", "inner", "outer"]
  286. MatplotlibColor = Union[str, Sequence[float]]
  287. TimeGrouperOrigin = Union[
  288. "Timestamp", Literal["epoch", "start", "start_day", "end", "end_day"]
  289. ]
  290. TimeAmbiguous = Union[Literal["infer", "NaT", "raise"], "npt.NDArray[np.bool_]"]
  291. TimeNonexistent = Union[
  292. Literal["shift_forward", "shift_backward", "NaT", "raise"], timedelta
  293. ]
  294. DropKeep = Literal["first", "last", False]
  295. CorrelationMethod = Union[
  296. Literal["pearson", "kendall", "spearman"], Callable[[np.ndarray, np.ndarray], float]
  297. ]
  298. AlignJoin = Literal["outer", "inner", "left", "right"]
  299. DtypeBackend = Literal["pyarrow", "numpy_nullable"]