__init__.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637
  1. """
  2. Expose public exceptions & warnings
  3. """
  4. from __future__ import annotations
  5. import ctypes
  6. from pandas._config.config import OptionError
  7. from pandas._libs.tslibs import (
  8. OutOfBoundsDatetime,
  9. OutOfBoundsTimedelta,
  10. )
  11. from pandas.util.version import InvalidVersion
  12. class IntCastingNaNError(ValueError):
  13. """
  14. Exception raised when converting (``astype``) an array with NaN to an integer type.
  15. """
  16. class NullFrequencyError(ValueError):
  17. """
  18. Exception raised when a ``freq`` cannot be null.
  19. Particularly ``DatetimeIndex.shift``, ``TimedeltaIndex.shift``,
  20. ``PeriodIndex.shift``.
  21. """
  22. class PerformanceWarning(Warning):
  23. """
  24. Warning raised when there is a possible performance impact.
  25. """
  26. class UnsupportedFunctionCall(ValueError):
  27. """
  28. Exception raised when attempting to call a unsupported numpy function.
  29. For example, ``np.cumsum(groupby_object)``.
  30. """
  31. class UnsortedIndexError(KeyError):
  32. """
  33. Error raised when slicing a MultiIndex which has not been lexsorted.
  34. Subclass of `KeyError`.
  35. """
  36. class ParserError(ValueError):
  37. """
  38. Exception that is raised by an error encountered in parsing file contents.
  39. This is a generic error raised for errors encountered when functions like
  40. `read_csv` or `read_html` are parsing contents of a file.
  41. See Also
  42. --------
  43. read_csv : Read CSV (comma-separated) file into a DataFrame.
  44. read_html : Read HTML table into a DataFrame.
  45. """
  46. class DtypeWarning(Warning):
  47. """
  48. Warning raised when reading different dtypes in a column from a file.
  49. Raised for a dtype incompatibility. This can happen whenever `read_csv`
  50. or `read_table` encounter non-uniform dtypes in a column(s) of a given
  51. CSV file.
  52. See Also
  53. --------
  54. read_csv : Read CSV (comma-separated) file into a DataFrame.
  55. read_table : Read general delimited file into a DataFrame.
  56. Notes
  57. -----
  58. This warning is issued when dealing with larger files because the dtype
  59. checking happens per chunk read.
  60. Despite the warning, the CSV file is read with mixed types in a single
  61. column which will be an object type. See the examples below to better
  62. understand this issue.
  63. Examples
  64. --------
  65. This example creates and reads a large CSV file with a column that contains
  66. `int` and `str`.
  67. >>> df = pd.DataFrame({'a': (['1'] * 100000 + ['X'] * 100000 +
  68. ... ['1'] * 100000),
  69. ... 'b': ['b'] * 300000}) # doctest: +SKIP
  70. >>> df.to_csv('test.csv', index=False) # doctest: +SKIP
  71. >>> df2 = pd.read_csv('test.csv') # doctest: +SKIP
  72. ... # DtypeWarning: Columns (0) have mixed types
  73. Important to notice that ``df2`` will contain both `str` and `int` for the
  74. same input, '1'.
  75. >>> df2.iloc[262140, 0] # doctest: +SKIP
  76. '1'
  77. >>> type(df2.iloc[262140, 0]) # doctest: +SKIP
  78. <class 'str'>
  79. >>> df2.iloc[262150, 0] # doctest: +SKIP
  80. 1
  81. >>> type(df2.iloc[262150, 0]) # doctest: +SKIP
  82. <class 'int'>
  83. One way to solve this issue is using the `dtype` parameter in the
  84. `read_csv` and `read_table` functions to explicit the conversion:
  85. >>> df2 = pd.read_csv('test.csv', sep=',', dtype={'a': str}) # doctest: +SKIP
  86. No warning was issued.
  87. """
  88. class EmptyDataError(ValueError):
  89. """
  90. Exception raised in ``pd.read_csv`` when empty data or header is encountered.
  91. """
  92. class ParserWarning(Warning):
  93. """
  94. Warning raised when reading a file that doesn't use the default 'c' parser.
  95. Raised by `pd.read_csv` and `pd.read_table` when it is necessary to change
  96. parsers, generally from the default 'c' parser to 'python'.
  97. It happens due to a lack of support or functionality for parsing a
  98. particular attribute of a CSV file with the requested engine.
  99. Currently, 'c' unsupported options include the following parameters:
  100. 1. `sep` other than a single character (e.g. regex separators)
  101. 2. `skipfooter` higher than 0
  102. 3. `sep=None` with `delim_whitespace=False`
  103. The warning can be avoided by adding `engine='python'` as a parameter in
  104. `pd.read_csv` and `pd.read_table` methods.
  105. See Also
  106. --------
  107. pd.read_csv : Read CSV (comma-separated) file into DataFrame.
  108. pd.read_table : Read general delimited file into DataFrame.
  109. Examples
  110. --------
  111. Using a `sep` in `pd.read_csv` other than a single character:
  112. >>> import io
  113. >>> csv = '''a;b;c
  114. ... 1;1,8
  115. ... 1;2,1'''
  116. >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]') # doctest: +SKIP
  117. ... # ParserWarning: Falling back to the 'python' engine...
  118. Adding `engine='python'` to `pd.read_csv` removes the Warning:
  119. >>> df = pd.read_csv(io.StringIO(csv), sep='[;,]', engine='python')
  120. """
  121. class MergeError(ValueError):
  122. """
  123. Exception raised when merging data.
  124. Subclass of ``ValueError``.
  125. """
  126. class AccessorRegistrationWarning(Warning):
  127. """
  128. Warning for attribute conflicts in accessor registration.
  129. """
  130. class AbstractMethodError(NotImplementedError):
  131. """
  132. Raise this error instead of NotImplementedError for abstract methods.
  133. """
  134. def __init__(self, class_instance, methodtype: str = "method") -> None:
  135. types = {"method", "classmethod", "staticmethod", "property"}
  136. if methodtype not in types:
  137. raise ValueError(
  138. f"methodtype must be one of {methodtype}, got {types} instead."
  139. )
  140. self.methodtype = methodtype
  141. self.class_instance = class_instance
  142. def __str__(self) -> str:
  143. if self.methodtype == "classmethod":
  144. name = self.class_instance.__name__
  145. else:
  146. name = type(self.class_instance).__name__
  147. return f"This {self.methodtype} must be defined in the concrete class {name}"
  148. class NumbaUtilError(Exception):
  149. """
  150. Error raised for unsupported Numba engine routines.
  151. """
  152. class DuplicateLabelError(ValueError):
  153. """
  154. Error raised when an operation would introduce duplicate labels.
  155. .. versionadded:: 1.2.0
  156. Examples
  157. --------
  158. >>> s = pd.Series([0, 1, 2], index=['a', 'b', 'c']).set_flags(
  159. ... allows_duplicate_labels=False
  160. ... )
  161. >>> s.reindex(['a', 'a', 'b'])
  162. Traceback (most recent call last):
  163. ...
  164. DuplicateLabelError: Index has duplicates.
  165. positions
  166. label
  167. a [0, 1]
  168. """
  169. class InvalidIndexError(Exception):
  170. """
  171. Exception raised when attempting to use an invalid index key.
  172. .. versionadded:: 1.1.0
  173. """
  174. class DataError(Exception):
  175. """
  176. Exceptionn raised when performing an operation on non-numerical data.
  177. For example, calling ``ohlc`` on a non-numerical column or a function
  178. on a rolling window.
  179. """
  180. class SpecificationError(Exception):
  181. """
  182. Exception raised by ``agg`` when the functions are ill-specified.
  183. The exception raised in two scenarios.
  184. The first way is calling ``agg`` on a
  185. Dataframe or Series using a nested renamer (dict-of-dict).
  186. The second way is calling ``agg`` on a Dataframe with duplicated functions
  187. names without assigning column name.
  188. Examples
  189. --------
  190. >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2],
  191. ... 'B': range(5),
  192. ... 'C': range(5)})
  193. >>> df.groupby('A').B.agg({'foo': 'count'}) # doctest: +SKIP
  194. ... # SpecificationError: nested renamer is not supported
  195. >>> df.groupby('A').agg({'B': {'foo': ['sum', 'max']}}) # doctest: +SKIP
  196. ... # SpecificationError: nested renamer is not supported
  197. >>> df.groupby('A').agg(['min', 'min']) # doctest: +SKIP
  198. ... # SpecificationError: nested renamer is not supported
  199. """
  200. class SettingWithCopyError(ValueError):
  201. """
  202. Exception raised when trying to set on a copied slice from a ``DataFrame``.
  203. The ``mode.chained_assignment`` needs to be set to set to 'raise.' This can
  204. happen unintentionally when chained indexing.
  205. For more information on evaluation order,
  206. see :ref:`the user guide<indexing.evaluation_order>`.
  207. For more information on view vs. copy,
  208. see :ref:`the user guide<indexing.view_versus_copy>`.
  209. Examples
  210. --------
  211. >>> pd.options.mode.chained_assignment = 'raise'
  212. >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
  213. >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
  214. ... # SettingWithCopyError: A value is trying to be set on a copy of a...
  215. """
  216. class SettingWithCopyWarning(Warning):
  217. """
  218. Warning raised when trying to set on a copied slice from a ``DataFrame``.
  219. The ``mode.chained_assignment`` needs to be set to set to 'warn.'
  220. 'Warn' is the default option. This can happen unintentionally when
  221. chained indexing.
  222. For more information on evaluation order,
  223. see :ref:`the user guide<indexing.evaluation_order>`.
  224. For more information on view vs. copy,
  225. see :ref:`the user guide<indexing.view_versus_copy>`.
  226. Examples
  227. --------
  228. >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
  229. >>> df.loc[0:3]['A'] = 'a' # doctest: +SKIP
  230. ... # SettingWithCopyWarning: A value is trying to be set on a copy of a...
  231. """
  232. class ChainedAssignmentError(Warning):
  233. """
  234. Warning raised when trying to set using chained assignment.
  235. When the ``mode.copy_on_write`` option is enabled, chained assignment can
  236. never work. In such a situation, we are always setting into a temporary
  237. object that is the result of an indexing operation (getitem), which under
  238. Copy-on-Write always behaves as a copy. Thus, assigning through a chain
  239. can never update the original Series or DataFrame.
  240. For more information on view vs. copy,
  241. see :ref:`the user guide<indexing.view_versus_copy>`.
  242. Examples
  243. --------
  244. >>> pd.options.mode.copy_on_write = True
  245. >>> df = pd.DataFrame({'A': [1, 1, 1, 2, 2]}, columns=['A'])
  246. >>> df["A"][0:3] = 10 # doctest: +SKIP
  247. ... # ChainedAssignmentError: ...
  248. >>> pd.options.mode.copy_on_write = False
  249. """
  250. _chained_assignment_msg = (
  251. "A value is trying to be set on a copy of a DataFrame or Series "
  252. "through chained assignment.\n"
  253. "When using the Copy-on-Write mode, such chained assignment never works "
  254. "to update the original DataFrame or Series, because the intermediate "
  255. "object on which we are setting values always behaves as a copy.\n\n"
  256. "Try using '.loc[row_indexer, col_indexer] = value' instead, to perform "
  257. "the assignment in a single step.\n\n"
  258. "See the caveats in the documentation: "
  259. "https://pandas.pydata.org/pandas-docs/stable/user_guide/"
  260. "indexing.html#returning-a-view-versus-a-copy"
  261. )
  262. class NumExprClobberingError(NameError):
  263. """
  264. Exception raised when trying to use a built-in numexpr name as a variable name.
  265. ``eval`` or ``query`` will throw the error if the engine is set
  266. to 'numexpr'. 'numexpr' is the default engine value for these methods if the
  267. numexpr package is installed.
  268. Examples
  269. --------
  270. >>> df = pd.DataFrame({'abs': [1, 1, 1]})
  271. >>> df.query("abs > 2") # doctest: +SKIP
  272. ... # NumExprClobberingError: Variables in expression "(abs) > (2)" overlap...
  273. >>> sin, a = 1, 2
  274. >>> pd.eval("sin + a", engine='numexpr') # doctest: +SKIP
  275. ... # NumExprClobberingError: Variables in expression "(sin) + (a)" overlap...
  276. """
  277. class UndefinedVariableError(NameError):
  278. """
  279. Exception raised by ``query`` or ``eval`` when using an undefined variable name.
  280. It will also specify whether the undefined variable is local or not.
  281. Examples
  282. --------
  283. >>> df = pd.DataFrame({'A': [1, 1, 1]})
  284. >>> df.query("A > x") # doctest: +SKIP
  285. ... # UndefinedVariableError: name 'x' is not defined
  286. >>> df.query("A > @y") # doctest: +SKIP
  287. ... # UndefinedVariableError: local variable 'y' is not defined
  288. >>> pd.eval('x + 1') # doctest: +SKIP
  289. ... # UndefinedVariableError: name 'x' is not defined
  290. """
  291. def __init__(self, name: str, is_local: bool | None = None) -> None:
  292. base_msg = f"{repr(name)} is not defined"
  293. if is_local:
  294. msg = f"local variable {base_msg}"
  295. else:
  296. msg = f"name {base_msg}"
  297. super().__init__(msg)
  298. class IndexingError(Exception):
  299. """
  300. Exception is raised when trying to index and there is a mismatch in dimensions.
  301. Examples
  302. --------
  303. >>> df = pd.DataFrame({'A': [1, 1, 1]})
  304. >>> df.loc[..., ..., 'A'] # doctest: +SKIP
  305. ... # IndexingError: indexer may only contain one '...' entry
  306. >>> df = pd.DataFrame({'A': [1, 1, 1]})
  307. >>> df.loc[1, ..., ...] # doctest: +SKIP
  308. ... # IndexingError: Too many indexers
  309. >>> df[pd.Series([True], dtype=bool)] # doctest: +SKIP
  310. ... # IndexingError: Unalignable boolean Series provided as indexer...
  311. >>> s = pd.Series(range(2),
  312. ... index = pd.MultiIndex.from_product([["a", "b"], ["c"]]))
  313. >>> s.loc["a", "c", "d"] # doctest: +SKIP
  314. ... # IndexingError: Too many indexers
  315. """
  316. class PyperclipException(RuntimeError):
  317. """
  318. Exception raised when clipboard functionality is unsupported.
  319. Raised by ``to_clipboard()`` and ``read_clipboard()``.
  320. """
  321. class PyperclipWindowsException(PyperclipException):
  322. """
  323. Exception raised when clipboard functionality is unsupported by Windows.
  324. Access to the clipboard handle would be denied due to some other
  325. window process is accessing it.
  326. """
  327. def __init__(self, message: str) -> None:
  328. # attr only exists on Windows, so typing fails on other platforms
  329. message += f" ({ctypes.WinError()})" # type: ignore[attr-defined]
  330. super().__init__(message)
  331. class CSSWarning(UserWarning):
  332. """
  333. Warning is raised when converting css styling fails.
  334. This can be due to the styling not having an equivalent value or because the
  335. styling isn't properly formatted.
  336. Examples
  337. --------
  338. >>> df = pd.DataFrame({'A': [1, 1, 1]})
  339. >>> (df.style.applymap(lambda x: 'background-color: blueGreenRed;')
  340. ... .to_excel('styled.xlsx')) # doctest: +SKIP
  341. ... # CSSWarning: Unhandled color format: 'blueGreenRed'
  342. >>> (df.style.applymap(lambda x: 'border: 1px solid red red;')
  343. ... .to_excel('styled.xlsx')) # doctest: +SKIP
  344. ... # CSSWarning: Too many tokens provided to "border" (expected 1-3)
  345. """
  346. class PossibleDataLossError(Exception):
  347. """
  348. Exception raised when trying to open a HDFStore file when already opened.
  349. Examples
  350. --------
  351. >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
  352. >>> store.open("w") # doctest: +SKIP
  353. ... # PossibleDataLossError: Re-opening the file [my-store] with mode [a]...
  354. """
  355. class ClosedFileError(Exception):
  356. """
  357. Exception is raised when trying to perform an operation on a closed HDFStore file.
  358. Examples
  359. --------
  360. >>> store = pd.HDFStore('my-store', 'a') # doctest: +SKIP
  361. >>> store.close() # doctest: +SKIP
  362. >>> store.keys() # doctest: +SKIP
  363. ... # ClosedFileError: my-store file is not open!
  364. """
  365. class IncompatibilityWarning(Warning):
  366. """
  367. Warning raised when trying to use where criteria on an incompatible HDF5 file.
  368. """
  369. class AttributeConflictWarning(Warning):
  370. """
  371. Warning raised when index attributes conflict when using HDFStore.
  372. Occurs when attempting to append an index with a different
  373. name than the existing index on an HDFStore or attempting to append an index with a
  374. different frequency than the existing index on an HDFStore.
  375. """
  376. class DatabaseError(OSError):
  377. """
  378. Error is raised when executing sql with bad syntax or sql that throws an error.
  379. Examples
  380. --------
  381. >>> from sqlite3 import connect
  382. >>> conn = connect(':memory:')
  383. >>> pd.read_sql('select * test', conn) # doctest: +SKIP
  384. ... # DatabaseError: Execution failed on sql 'test': near "test": syntax error
  385. """
  386. class PossiblePrecisionLoss(Warning):
  387. """
  388. Warning raised by to_stata on a column with a value outside or equal to int64.
  389. When the column value is outside or equal to the int64 value the column is
  390. converted to a float64 dtype.
  391. Examples
  392. --------
  393. >>> df = pd.DataFrame({"s": pd.Series([1, 2**53], dtype=np.int64)})
  394. >>> df.to_stata('test') # doctest: +SKIP
  395. ... # PossiblePrecisionLoss: Column converted from int64 to float64...
  396. """
  397. class ValueLabelTypeMismatch(Warning):
  398. """
  399. Warning raised by to_stata on a category column that contains non-string values.
  400. Examples
  401. --------
  402. >>> df = pd.DataFrame({"categories": pd.Series(["a", 2], dtype="category")})
  403. >>> df.to_stata('test') # doctest: +SKIP
  404. ... # ValueLabelTypeMismatch: Stata value labels (pandas categories) must be str...
  405. """
  406. class InvalidColumnName(Warning):
  407. """
  408. Warning raised by to_stata the column contains a non-valid stata name.
  409. Because the column name is an invalid Stata variable, the name needs to be
  410. converted.
  411. Examples
  412. --------
  413. >>> df = pd.DataFrame({"0categories": pd.Series([2, 2])})
  414. >>> df.to_stata('test') # doctest: +SKIP
  415. ... # InvalidColumnName: Not all pandas column names were valid Stata variable...
  416. """
  417. class CategoricalConversionWarning(Warning):
  418. """
  419. Warning is raised when reading a partial labeled Stata file using a iterator.
  420. Examples
  421. --------
  422. >>> from pandas.io.stata import StataReader
  423. >>> with StataReader('dta_file', chunksize=2) as reader: # doctest: +SKIP
  424. ... for i, block in enumerate(reader):
  425. ... print(i, block)
  426. ... # CategoricalConversionWarning: One or more series with value labels...
  427. """
  428. class LossySetitemError(Exception):
  429. """
  430. Raised when trying to do a __setitem__ on an np.ndarray that is not lossless.
  431. """
  432. class NoBufferPresent(Exception):
  433. """
  434. Exception is raised in _get_data_buffer to signal that there is no requested buffer.
  435. """
  436. class InvalidComparison(Exception):
  437. """
  438. Exception is raised by _validate_comparison_value to indicate an invalid comparison.
  439. """
  440. __all__ = [
  441. "AbstractMethodError",
  442. "AccessorRegistrationWarning",
  443. "AttributeConflictWarning",
  444. "CategoricalConversionWarning",
  445. "ClosedFileError",
  446. "CSSWarning",
  447. "DatabaseError",
  448. "DataError",
  449. "DtypeWarning",
  450. "DuplicateLabelError",
  451. "EmptyDataError",
  452. "IncompatibilityWarning",
  453. "IntCastingNaNError",
  454. "InvalidColumnName",
  455. "InvalidComparison",
  456. "InvalidIndexError",
  457. "InvalidVersion",
  458. "IndexingError",
  459. "LossySetitemError",
  460. "MergeError",
  461. "NoBufferPresent",
  462. "NullFrequencyError",
  463. "NumbaUtilError",
  464. "NumExprClobberingError",
  465. "OptionError",
  466. "OutOfBoundsDatetime",
  467. "OutOfBoundsTimedelta",
  468. "ParserError",
  469. "ParserWarning",
  470. "PerformanceWarning",
  471. "PossibleDataLossError",
  472. "PossiblePrecisionLoss",
  473. "PyperclipException",
  474. "PyperclipWindowsException",
  475. "SettingWithCopyError",
  476. "SettingWithCopyWarning",
  477. "SpecificationError",
  478. "UndefinedVariableError",
  479. "UnsortedIndexError",
  480. "UnsupportedFunctionCall",
  481. "ValueLabelTypeMismatch",
  482. ]