conftest.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297
  1. from __future__ import annotations
  2. import os
  3. import pytest
  4. from pandas.compat._optional import VERSIONS
  5. from pandas import (
  6. read_csv,
  7. read_table,
  8. )
  9. import pandas._testing as tm
  10. class BaseParser:
  11. engine: str | None = None
  12. low_memory = True
  13. float_precision_choices: list[str | None] = []
  14. def update_kwargs(self, kwargs):
  15. kwargs = kwargs.copy()
  16. kwargs.update({"engine": self.engine, "low_memory": self.low_memory})
  17. return kwargs
  18. def read_csv(self, *args, **kwargs):
  19. kwargs = self.update_kwargs(kwargs)
  20. return read_csv(*args, **kwargs)
  21. def read_csv_check_warnings(
  22. self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
  23. ):
  24. # We need to check the stacklevel here instead of in the tests
  25. # since this is where read_csv is called and where the warning
  26. # should point to.
  27. kwargs = self.update_kwargs(kwargs)
  28. with tm.assert_produces_warning(warn_type, match=warn_msg):
  29. return read_csv(*args, **kwargs)
  30. def read_table(self, *args, **kwargs):
  31. kwargs = self.update_kwargs(kwargs)
  32. return read_table(*args, **kwargs)
  33. def read_table_check_warnings(
  34. self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
  35. ):
  36. # We need to check the stacklevel here instead of in the tests
  37. # since this is where read_table is called and where the warning
  38. # should point to.
  39. kwargs = self.update_kwargs(kwargs)
  40. with tm.assert_produces_warning(warn_type, match=warn_msg):
  41. return read_table(*args, **kwargs)
  42. class CParser(BaseParser):
  43. engine = "c"
  44. float_precision_choices = [None, "high", "round_trip"]
  45. class CParserHighMemory(CParser):
  46. low_memory = False
  47. class CParserLowMemory(CParser):
  48. low_memory = True
  49. class PythonParser(BaseParser):
  50. engine = "python"
  51. float_precision_choices = [None]
  52. class PyArrowParser(BaseParser):
  53. engine = "pyarrow"
  54. float_precision_choices = [None]
  55. @pytest.fixture
  56. def csv_dir_path(datapath):
  57. """
  58. The directory path to the data files needed for parser tests.
  59. """
  60. return datapath("io", "parser", "data")
  61. @pytest.fixture
  62. def csv1(datapath):
  63. """
  64. The path to the data file "test1.csv" needed for parser tests.
  65. """
  66. return os.path.join(datapath("io", "data", "csv"), "test1.csv")
  67. _cParserHighMemory = CParserHighMemory
  68. _cParserLowMemory = CParserLowMemory
  69. _pythonParser = PythonParser
  70. _pyarrowParser = PyArrowParser
  71. _py_parsers_only = [_pythonParser]
  72. _c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
  73. _pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)]
  74. _all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
  75. _py_parser_ids = ["python"]
  76. _c_parser_ids = ["c_high", "c_low"]
  77. _pyarrow_parsers_ids = ["pyarrow"]
  78. _all_parser_ids = [*_c_parser_ids, *_py_parser_ids, *_pyarrow_parsers_ids]
  79. @pytest.fixture(params=_all_parsers, ids=_all_parser_ids)
  80. def all_parsers(request):
  81. """
  82. Fixture all of the CSV parsers.
  83. """
  84. parser = request.param()
  85. if parser.engine == "pyarrow":
  86. pytest.importorskip("pyarrow", VERSIONS["pyarrow"])
  87. # Try finding a way to disable threads all together
  88. # for more stable CI runs
  89. import pyarrow
  90. pyarrow.set_cpu_count(1)
  91. return parser
  92. @pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids)
  93. def c_parser_only(request):
  94. """
  95. Fixture all of the CSV parsers using the C engine.
  96. """
  97. return request.param()
  98. @pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids)
  99. def python_parser_only(request):
  100. """
  101. Fixture all of the CSV parsers using the Python engine.
  102. """
  103. return request.param()
  104. @pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids)
  105. def pyarrow_parser_only(request):
  106. """
  107. Fixture all of the CSV parsers using the Pyarrow engine.
  108. """
  109. return request.param()
  110. def _get_all_parser_float_precision_combinations():
  111. """
  112. Return all allowable parser and float precision
  113. combinations and corresponding ids.
  114. """
  115. params = []
  116. ids = []
  117. for parser, parser_id in zip(_all_parsers, _all_parser_ids):
  118. if hasattr(parser, "values"):
  119. # Wrapped in pytest.param, get the actual parser back
  120. parser = parser.values[0]
  121. for precision in parser.float_precision_choices:
  122. # Re-wrap in pytest.param for pyarrow
  123. mark = pytest.mark.single_cpu if parser.engine == "pyarrow" else ()
  124. param = pytest.param((parser(), precision), marks=mark)
  125. params.append(param)
  126. ids.append(f"{parser_id}-{precision}")
  127. return {"params": params, "ids": ids}
  128. @pytest.fixture(
  129. params=_get_all_parser_float_precision_combinations()["params"],
  130. ids=_get_all_parser_float_precision_combinations()["ids"],
  131. )
  132. def all_parsers_all_precisions(request):
  133. """
  134. Fixture for all allowable combinations of parser
  135. and float precision
  136. """
  137. return request.param
  138. _utf_values = [8, 16, 32]
  139. _encoding_seps = ["", "-", "_"]
  140. _encoding_prefixes = ["utf", "UTF"]
  141. _encoding_fmts = [
  142. f"{prefix}{sep}" + "{0}" for sep in _encoding_seps for prefix in _encoding_prefixes
  143. ]
  144. @pytest.fixture(params=_utf_values)
  145. def utf_value(request):
  146. """
  147. Fixture for all possible integer values for a UTF encoding.
  148. """
  149. return request.param
  150. @pytest.fixture(params=_encoding_fmts)
  151. def encoding_fmt(request):
  152. """
  153. Fixture for all possible string formats of a UTF encoding.
  154. """
  155. return request.param
  156. @pytest.fixture(
  157. params=[
  158. ("-1,0", -1.0),
  159. ("-1,2e0", -1.2),
  160. ("-1e0", -1.0),
  161. ("+1e0", 1.0),
  162. ("+1e+0", 1.0),
  163. ("+1e-1", 0.1),
  164. ("+,1e1", 1.0),
  165. ("+1,e0", 1.0),
  166. ("-,1e1", -1.0),
  167. ("-1,e0", -1.0),
  168. ("0,1", 0.1),
  169. ("1,", 1.0),
  170. (",1", 0.1),
  171. ("-,1", -0.1),
  172. ("1_,", 1.0),
  173. ("1_234,56", 1234.56),
  174. ("1_234,56e0", 1234.56),
  175. # negative cases; must not parse as float
  176. ("_", "_"),
  177. ("-_", "-_"),
  178. ("-_1", "-_1"),
  179. ("-_1e0", "-_1e0"),
  180. ("_1", "_1"),
  181. ("_1,", "_1,"),
  182. ("_1,_", "_1,_"),
  183. ("_1e0", "_1e0"),
  184. ("1,2e_1", "1,2e_1"),
  185. ("1,2e1_0", "1,2e1_0"),
  186. ("1,_2", "1,_2"),
  187. (",1__2", ",1__2"),
  188. (",1e", ",1e"),
  189. ("-,1e", "-,1e"),
  190. ("1_000,000_000", "1_000,000_000"),
  191. ("1,e1_2", "1,e1_2"),
  192. ("e11,2", "e11,2"),
  193. ("1e11,2", "1e11,2"),
  194. ("1,2,2", "1,2,2"),
  195. ("1,2_1", "1,2_1"),
  196. ("1,2e-10e1", "1,2e-10e1"),
  197. ("--1,2", "--1,2"),
  198. ("1a_2,1", "1a_2,1"),
  199. ("1,2E-1", 0.12),
  200. ("1,2E1", 12.0),
  201. ]
  202. )
  203. def numeric_decimal(request):
  204. """
  205. Fixture for all numeric formats which should get recognized. The first entry
  206. represents the value to read while the second represents the expected result.
  207. """
  208. return request.param
  209. @pytest.fixture
  210. def pyarrow_xfail(request):
  211. """
  212. Fixture that xfails a test if the engine is pyarrow.
  213. """
  214. if "all_parsers" in request.fixturenames:
  215. parser = request.getfixturevalue("all_parsers")
  216. elif "all_parsers_all_precisions" in request.fixturenames:
  217. # Return value is tuple of (engine, precision)
  218. parser = request.getfixturevalue("all_parsers_all_precisions")[0]
  219. else:
  220. return
  221. if parser.engine == "pyarrow":
  222. mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
  223. request.node.add_marker(mark)
  224. @pytest.fixture
  225. def pyarrow_skip(request):
  226. """
  227. Fixture that skips a test if the engine is pyarrow.
  228. """
  229. if "all_parsers" in request.fixturenames:
  230. parser = request.getfixturevalue("all_parsers")
  231. elif "all_parsers_all_precisions" in request.fixturenames:
  232. # Return value is tuple of (engine, precision)
  233. parser = request.getfixturevalue("all_parsers_all_precisions")[0]
  234. else:
  235. return
  236. if parser.engine == "pyarrow":
  237. pytest.skip("pyarrow doesn't support this.")