__init__.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346
  1. from __future__ import annotations
  2. __docformat__ = "restructuredtext"
  3. # Let users know if they're missing any of our hard dependencies
  4. _hard_dependencies = ("numpy", "pytz", "dateutil")
  5. _missing_dependencies = []
  6. for _dependency in _hard_dependencies:
  7. try:
  8. __import__(_dependency)
  9. except ImportError as _e: # pragma: no cover
  10. _missing_dependencies.append(f"{_dependency}: {_e}")
  11. if _missing_dependencies: # pragma: no cover
  12. raise ImportError(
  13. "Unable to import required dependencies:\n" + "\n".join(_missing_dependencies)
  14. )
  15. del _hard_dependencies, _dependency, _missing_dependencies
  16. # numpy compat
  17. from pandas.compat import is_numpy_dev as _is_numpy_dev # pyright: ignore # noqa:F401
  18. try:
  19. from pandas._libs import hashtable as _hashtable, lib as _lib, tslib as _tslib
  20. except ImportError as _err: # pragma: no cover
  21. _module = _err.name
  22. raise ImportError(
  23. f"C extension: {_module} not built. If you want to import "
  24. "pandas from the source directory, you may need to run "
  25. "'python setup.py build_ext --force' to build the C extensions first."
  26. ) from _err
  27. else:
  28. del _tslib, _lib, _hashtable
  29. from pandas._config import (
  30. get_option,
  31. set_option,
  32. reset_option,
  33. describe_option,
  34. option_context,
  35. options,
  36. )
  37. # let init-time option registration happen
  38. import pandas.core.config_init # pyright: ignore # noqa:F401
  39. from pandas.core.api import (
  40. # dtype
  41. ArrowDtype,
  42. Int8Dtype,
  43. Int16Dtype,
  44. Int32Dtype,
  45. Int64Dtype,
  46. UInt8Dtype,
  47. UInt16Dtype,
  48. UInt32Dtype,
  49. UInt64Dtype,
  50. Float32Dtype,
  51. Float64Dtype,
  52. CategoricalDtype,
  53. PeriodDtype,
  54. IntervalDtype,
  55. DatetimeTZDtype,
  56. StringDtype,
  57. BooleanDtype,
  58. # missing
  59. NA,
  60. isna,
  61. isnull,
  62. notna,
  63. notnull,
  64. # indexes
  65. Index,
  66. CategoricalIndex,
  67. RangeIndex,
  68. MultiIndex,
  69. IntervalIndex,
  70. TimedeltaIndex,
  71. DatetimeIndex,
  72. PeriodIndex,
  73. IndexSlice,
  74. # tseries
  75. NaT,
  76. Period,
  77. period_range,
  78. Timedelta,
  79. timedelta_range,
  80. Timestamp,
  81. date_range,
  82. bdate_range,
  83. Interval,
  84. interval_range,
  85. DateOffset,
  86. # conversion
  87. to_numeric,
  88. to_datetime,
  89. to_timedelta,
  90. # misc
  91. Flags,
  92. Grouper,
  93. factorize,
  94. unique,
  95. value_counts,
  96. NamedAgg,
  97. array,
  98. Categorical,
  99. set_eng_float_format,
  100. Series,
  101. DataFrame,
  102. )
  103. from pandas.core.arrays.sparse import SparseDtype
  104. from pandas.tseries.api import infer_freq
  105. from pandas.tseries import offsets
  106. from pandas.core.computation.api import eval
  107. from pandas.core.reshape.api import (
  108. concat,
  109. lreshape,
  110. melt,
  111. wide_to_long,
  112. merge,
  113. merge_asof,
  114. merge_ordered,
  115. crosstab,
  116. pivot,
  117. pivot_table,
  118. get_dummies,
  119. from_dummies,
  120. cut,
  121. qcut,
  122. )
  123. from pandas import api, arrays, errors, io, plotting, tseries
  124. from pandas import testing
  125. from pandas.util._print_versions import show_versions
  126. from pandas.io.api import (
  127. # excel
  128. ExcelFile,
  129. ExcelWriter,
  130. read_excel,
  131. # parsers
  132. read_csv,
  133. read_fwf,
  134. read_table,
  135. # pickle
  136. read_pickle,
  137. to_pickle,
  138. # pytables
  139. HDFStore,
  140. read_hdf,
  141. # sql
  142. read_sql,
  143. read_sql_query,
  144. read_sql_table,
  145. # misc
  146. read_clipboard,
  147. read_parquet,
  148. read_orc,
  149. read_feather,
  150. read_gbq,
  151. read_html,
  152. read_xml,
  153. read_json,
  154. read_stata,
  155. read_sas,
  156. read_spss,
  157. )
  158. from pandas.io.json._normalize import json_normalize
  159. from pandas.util._tester import test
  160. # use the closest tagged version if possible
  161. from pandas._version import get_versions
  162. v = get_versions()
  163. __version__ = v.get("closest-tag", v["version"])
  164. __git_version__ = v.get("full-revisionid")
  165. del get_versions, v
  166. # module level doc-string
  167. __doc__ = """
  168. pandas - a powerful data analysis and manipulation library for Python
  169. =====================================================================
  170. **pandas** is a Python package providing fast, flexible, and expressive data
  171. structures designed to make working with "relational" or "labeled" data both
  172. easy and intuitive. It aims to be the fundamental high-level building block for
  173. doing practical, **real world** data analysis in Python. Additionally, it has
  174. the broader goal of becoming **the most powerful and flexible open source data
  175. analysis / manipulation tool available in any language**. It is already well on
  176. its way toward this goal.
  177. Main Features
  178. -------------
  179. Here are just a few of the things that pandas does well:
  180. - Easy handling of missing data in floating point as well as non-floating
  181. point data.
  182. - Size mutability: columns can be inserted and deleted from DataFrame and
  183. higher dimensional objects
  184. - Automatic and explicit data alignment: objects can be explicitly aligned
  185. to a set of labels, or the user can simply ignore the labels and let
  186. `Series`, `DataFrame`, etc. automatically align the data for you in
  187. computations.
  188. - Powerful, flexible group by functionality to perform split-apply-combine
  189. operations on data sets, for both aggregating and transforming data.
  190. - Make it easy to convert ragged, differently-indexed data in other Python
  191. and NumPy data structures into DataFrame objects.
  192. - Intelligent label-based slicing, fancy indexing, and subsetting of large
  193. data sets.
  194. - Intuitive merging and joining data sets.
  195. - Flexible reshaping and pivoting of data sets.
  196. - Hierarchical labeling of axes (possible to have multiple labels per tick).
  197. - Robust IO tools for loading data from flat files (CSV and delimited),
  198. Excel files, databases, and saving/loading data from the ultrafast HDF5
  199. format.
  200. - Time series-specific functionality: date range generation and frequency
  201. conversion, moving window statistics, date shifting and lagging.
  202. """
  203. # Use __all__ to let type checkers know what is part of the public API.
  204. # Pandas is not (yet) a py.typed library: the public API is determined
  205. # based on the documentation.
  206. __all__ = [
  207. "ArrowDtype",
  208. "BooleanDtype",
  209. "Categorical",
  210. "CategoricalDtype",
  211. "CategoricalIndex",
  212. "DataFrame",
  213. "DateOffset",
  214. "DatetimeIndex",
  215. "DatetimeTZDtype",
  216. "ExcelFile",
  217. "ExcelWriter",
  218. "Flags",
  219. "Float32Dtype",
  220. "Float64Dtype",
  221. "Grouper",
  222. "HDFStore",
  223. "Index",
  224. "IndexSlice",
  225. "Int16Dtype",
  226. "Int32Dtype",
  227. "Int64Dtype",
  228. "Int8Dtype",
  229. "Interval",
  230. "IntervalDtype",
  231. "IntervalIndex",
  232. "MultiIndex",
  233. "NA",
  234. "NaT",
  235. "NamedAgg",
  236. "Period",
  237. "PeriodDtype",
  238. "PeriodIndex",
  239. "RangeIndex",
  240. "Series",
  241. "SparseDtype",
  242. "StringDtype",
  243. "Timedelta",
  244. "TimedeltaIndex",
  245. "Timestamp",
  246. "UInt16Dtype",
  247. "UInt32Dtype",
  248. "UInt64Dtype",
  249. "UInt8Dtype",
  250. "api",
  251. "array",
  252. "arrays",
  253. "bdate_range",
  254. "concat",
  255. "crosstab",
  256. "cut",
  257. "date_range",
  258. "describe_option",
  259. "errors",
  260. "eval",
  261. "factorize",
  262. "get_dummies",
  263. "from_dummies",
  264. "get_option",
  265. "infer_freq",
  266. "interval_range",
  267. "io",
  268. "isna",
  269. "isnull",
  270. "json_normalize",
  271. "lreshape",
  272. "melt",
  273. "merge",
  274. "merge_asof",
  275. "merge_ordered",
  276. "notna",
  277. "notnull",
  278. "offsets",
  279. "option_context",
  280. "options",
  281. "period_range",
  282. "pivot",
  283. "pivot_table",
  284. "plotting",
  285. "qcut",
  286. "read_clipboard",
  287. "read_csv",
  288. "read_excel",
  289. "read_feather",
  290. "read_fwf",
  291. "read_gbq",
  292. "read_hdf",
  293. "read_html",
  294. "read_json",
  295. "read_orc",
  296. "read_parquet",
  297. "read_pickle",
  298. "read_sas",
  299. "read_spss",
  300. "read_sql",
  301. "read_sql_query",
  302. "read_sql_table",
  303. "read_stata",
  304. "read_table",
  305. "read_xml",
  306. "reset_option",
  307. "set_eng_float_format",
  308. "set_option",
  309. "show_versions",
  310. "test",
  311. "testing",
  312. "timedelta_range",
  313. "to_datetime",
  314. "to_numeric",
  315. "to_pickle",
  316. "to_timedelta",
  317. "tseries",
  318. "unique",
  319. "value_counts",
  320. "wide_to_long",
  321. ]