123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297 |
- from __future__ import annotations
- import os
- import pytest
- from pandas.compat._optional import VERSIONS
- from pandas import (
- read_csv,
- read_table,
- )
- import pandas._testing as tm
- class BaseParser:
- engine: str | None = None
- low_memory = True
- float_precision_choices: list[str | None] = []
- def update_kwargs(self, kwargs):
- kwargs = kwargs.copy()
- kwargs.update({"engine": self.engine, "low_memory": self.low_memory})
- return kwargs
- def read_csv(self, *args, **kwargs):
- kwargs = self.update_kwargs(kwargs)
- return read_csv(*args, **kwargs)
- def read_csv_check_warnings(
- self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
- ):
- # We need to check the stacklevel here instead of in the tests
- # since this is where read_csv is called and where the warning
- # should point to.
- kwargs = self.update_kwargs(kwargs)
- with tm.assert_produces_warning(warn_type, match=warn_msg):
- return read_csv(*args, **kwargs)
- def read_table(self, *args, **kwargs):
- kwargs = self.update_kwargs(kwargs)
- return read_table(*args, **kwargs)
- def read_table_check_warnings(
- self, warn_type: type[Warning], warn_msg: str, *args, **kwargs
- ):
- # We need to check the stacklevel here instead of in the tests
- # since this is where read_table is called and where the warning
- # should point to.
- kwargs = self.update_kwargs(kwargs)
- with tm.assert_produces_warning(warn_type, match=warn_msg):
- return read_table(*args, **kwargs)
- class CParser(BaseParser):
- engine = "c"
- float_precision_choices = [None, "high", "round_trip"]
- class CParserHighMemory(CParser):
- low_memory = False
- class CParserLowMemory(CParser):
- low_memory = True
- class PythonParser(BaseParser):
- engine = "python"
- float_precision_choices = [None]
- class PyArrowParser(BaseParser):
- engine = "pyarrow"
- float_precision_choices = [None]
- @pytest.fixture
- def csv_dir_path(datapath):
- """
- The directory path to the data files needed for parser tests.
- """
- return datapath("io", "parser", "data")
- @pytest.fixture
- def csv1(datapath):
- """
- The path to the data file "test1.csv" needed for parser tests.
- """
- return os.path.join(datapath("io", "data", "csv"), "test1.csv")
- _cParserHighMemory = CParserHighMemory
- _cParserLowMemory = CParserLowMemory
- _pythonParser = PythonParser
- _pyarrowParser = PyArrowParser
- _py_parsers_only = [_pythonParser]
- _c_parsers_only = [_cParserHighMemory, _cParserLowMemory]
- _pyarrow_parsers_only = [pytest.param(_pyarrowParser, marks=pytest.mark.single_cpu)]
- _all_parsers = [*_c_parsers_only, *_py_parsers_only, *_pyarrow_parsers_only]
- _py_parser_ids = ["python"]
- _c_parser_ids = ["c_high", "c_low"]
- _pyarrow_parsers_ids = ["pyarrow"]
- _all_parser_ids = [*_c_parser_ids, *_py_parser_ids, *_pyarrow_parsers_ids]
- @pytest.fixture(params=_all_parsers, ids=_all_parser_ids)
- def all_parsers(request):
- """
- Fixture all of the CSV parsers.
- """
- parser = request.param()
- if parser.engine == "pyarrow":
- pytest.importorskip("pyarrow", VERSIONS["pyarrow"])
- # Try finding a way to disable threads all together
- # for more stable CI runs
- import pyarrow
- pyarrow.set_cpu_count(1)
- return parser
- @pytest.fixture(params=_c_parsers_only, ids=_c_parser_ids)
- def c_parser_only(request):
- """
- Fixture all of the CSV parsers using the C engine.
- """
- return request.param()
- @pytest.fixture(params=_py_parsers_only, ids=_py_parser_ids)
- def python_parser_only(request):
- """
- Fixture all of the CSV parsers using the Python engine.
- """
- return request.param()
- @pytest.fixture(params=_pyarrow_parsers_only, ids=_pyarrow_parsers_ids)
- def pyarrow_parser_only(request):
- """
- Fixture all of the CSV parsers using the Pyarrow engine.
- """
- return request.param()
- def _get_all_parser_float_precision_combinations():
- """
- Return all allowable parser and float precision
- combinations and corresponding ids.
- """
- params = []
- ids = []
- for parser, parser_id in zip(_all_parsers, _all_parser_ids):
- if hasattr(parser, "values"):
- # Wrapped in pytest.param, get the actual parser back
- parser = parser.values[0]
- for precision in parser.float_precision_choices:
- # Re-wrap in pytest.param for pyarrow
- mark = pytest.mark.single_cpu if parser.engine == "pyarrow" else ()
- param = pytest.param((parser(), precision), marks=mark)
- params.append(param)
- ids.append(f"{parser_id}-{precision}")
- return {"params": params, "ids": ids}
- @pytest.fixture(
- params=_get_all_parser_float_precision_combinations()["params"],
- ids=_get_all_parser_float_precision_combinations()["ids"],
- )
- def all_parsers_all_precisions(request):
- """
- Fixture for all allowable combinations of parser
- and float precision
- """
- return request.param
- _utf_values = [8, 16, 32]
- _encoding_seps = ["", "-", "_"]
- _encoding_prefixes = ["utf", "UTF"]
- _encoding_fmts = [
- f"{prefix}{sep}" + "{0}" for sep in _encoding_seps for prefix in _encoding_prefixes
- ]
- @pytest.fixture(params=_utf_values)
- def utf_value(request):
- """
- Fixture for all possible integer values for a UTF encoding.
- """
- return request.param
- @pytest.fixture(params=_encoding_fmts)
- def encoding_fmt(request):
- """
- Fixture for all possible string formats of a UTF encoding.
- """
- return request.param
- @pytest.fixture(
- params=[
- ("-1,0", -1.0),
- ("-1,2e0", -1.2),
- ("-1e0", -1.0),
- ("+1e0", 1.0),
- ("+1e+0", 1.0),
- ("+1e-1", 0.1),
- ("+,1e1", 1.0),
- ("+1,e0", 1.0),
- ("-,1e1", -1.0),
- ("-1,e0", -1.0),
- ("0,1", 0.1),
- ("1,", 1.0),
- (",1", 0.1),
- ("-,1", -0.1),
- ("1_,", 1.0),
- ("1_234,56", 1234.56),
- ("1_234,56e0", 1234.56),
- # negative cases; must not parse as float
- ("_", "_"),
- ("-_", "-_"),
- ("-_1", "-_1"),
- ("-_1e0", "-_1e0"),
- ("_1", "_1"),
- ("_1,", "_1,"),
- ("_1,_", "_1,_"),
- ("_1e0", "_1e0"),
- ("1,2e_1", "1,2e_1"),
- ("1,2e1_0", "1,2e1_0"),
- ("1,_2", "1,_2"),
- (",1__2", ",1__2"),
- (",1e", ",1e"),
- ("-,1e", "-,1e"),
- ("1_000,000_000", "1_000,000_000"),
- ("1,e1_2", "1,e1_2"),
- ("e11,2", "e11,2"),
- ("1e11,2", "1e11,2"),
- ("1,2,2", "1,2,2"),
- ("1,2_1", "1,2_1"),
- ("1,2e-10e1", "1,2e-10e1"),
- ("--1,2", "--1,2"),
- ("1a_2,1", "1a_2,1"),
- ("1,2E-1", 0.12),
- ("1,2E1", 12.0),
- ]
- )
- def numeric_decimal(request):
- """
- Fixture for all numeric formats which should get recognized. The first entry
- represents the value to read while the second represents the expected result.
- """
- return request.param
- @pytest.fixture
- def pyarrow_xfail(request):
- """
- Fixture that xfails a test if the engine is pyarrow.
- """
- if "all_parsers" in request.fixturenames:
- parser = request.getfixturevalue("all_parsers")
- elif "all_parsers_all_precisions" in request.fixturenames:
- # Return value is tuple of (engine, precision)
- parser = request.getfixturevalue("all_parsers_all_precisions")[0]
- else:
- return
- if parser.engine == "pyarrow":
- mark = pytest.mark.xfail(reason="pyarrow doesn't support this.")
- request.node.add_marker(mark)
- @pytest.fixture
- def pyarrow_skip(request):
- """
- Fixture that skips a test if the engine is pyarrow.
- """
- if "all_parsers" in request.fixturenames:
- parser = request.getfixturevalue("all_parsers")
- elif "all_parsers_all_precisions" in request.fixturenames:
- # Return value is tuple of (engine, precision)
- parser = request.getfixturevalue("all_parsers_all_precisions")[0]
- else:
- return
- if parser.engine == "pyarrow":
- pytest.skip("pyarrow doesn't support this.")
|