test_strings.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. """
  2. Tests the usecols functionality during parsing
  3. for all of the parsers defined in parsers.py
  4. """
  5. from io import StringIO
  6. import pytest
  7. from pandas import DataFrame
  8. import pandas._testing as tm
  9. _msg_validate_usecols_arg = (
  10. "'usecols' must either be list-like "
  11. "of all strings, all unicode, all "
  12. "integers or a callable."
  13. )
  14. _msg_validate_usecols_names = (
  15. "Usecols do not match columns, columns expected but not found: {0}"
  16. )
  17. def test_usecols_with_unicode_strings(all_parsers):
  18. # see gh-13219
  19. data = """AAA,BBB,CCC,DDD
  20. 0.056674973,8,True,a
  21. 2.613230982,2,False,b
  22. 3.568935038,7,False,a"""
  23. parser = all_parsers
  24. exp_data = {
  25. "AAA": {
  26. 0: 0.056674972999999997,
  27. 1: 2.6132309819999997,
  28. 2: 3.5689350380000002,
  29. },
  30. "BBB": {0: 8, 1: 2, 2: 7},
  31. }
  32. expected = DataFrame(exp_data)
  33. result = parser.read_csv(StringIO(data), usecols=["AAA", "BBB"])
  34. tm.assert_frame_equal(result, expected)
  35. def test_usecols_with_single_byte_unicode_strings(all_parsers):
  36. # see gh-13219
  37. data = """A,B,C,D
  38. 0.056674973,8,True,a
  39. 2.613230982,2,False,b
  40. 3.568935038,7,False,a"""
  41. parser = all_parsers
  42. exp_data = {
  43. "A": {
  44. 0: 0.056674972999999997,
  45. 1: 2.6132309819999997,
  46. 2: 3.5689350380000002,
  47. },
  48. "B": {0: 8, 1: 2, 2: 7},
  49. }
  50. expected = DataFrame(exp_data)
  51. result = parser.read_csv(StringIO(data), usecols=["A", "B"])
  52. tm.assert_frame_equal(result, expected)
  53. @pytest.mark.parametrize("usecols", [["AAA", b"BBB"], [b"AAA", "BBB"]])
  54. def test_usecols_with_mixed_encoding_strings(all_parsers, usecols):
  55. data = """AAA,BBB,CCC,DDD
  56. 0.056674973,8,True,a
  57. 2.613230982,2,False,b
  58. 3.568935038,7,False,a"""
  59. parser = all_parsers
  60. with pytest.raises(ValueError, match=_msg_validate_usecols_arg):
  61. parser.read_csv(StringIO(data), usecols=usecols)
  62. @pytest.mark.parametrize("usecols", [["あああ", "いい"], ["あああ", "いい"]])
  63. def test_usecols_with_multi_byte_characters(all_parsers, usecols):
  64. data = """あああ,いい,ううう,ええええ
  65. 0.056674973,8,True,a
  66. 2.613230982,2,False,b
  67. 3.568935038,7,False,a"""
  68. parser = all_parsers
  69. exp_data = {
  70. "あああ": {
  71. 0: 0.056674972999999997,
  72. 1: 2.6132309819999997,
  73. 2: 3.5689350380000002,
  74. },
  75. "いい": {0: 8, 1: 2, 2: 7},
  76. }
  77. expected = DataFrame(exp_data)
  78. result = parser.read_csv(StringIO(data), usecols=usecols)
  79. tm.assert_frame_equal(result, expected)