test_insert.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. """
  2. test_insert is specifically for the DataFrame.insert method; not to be
  3. confused with tests with "insert" in their names that are really testing
  4. __setitem__.
  5. """
  6. import numpy as np
  7. import pytest
  8. from pandas.errors import PerformanceWarning
  9. from pandas import (
  10. DataFrame,
  11. Index,
  12. )
  13. import pandas._testing as tm
  14. class TestDataFrameInsert:
  15. def test_insert(self):
  16. df = DataFrame(
  17. np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]
  18. )
  19. df.insert(0, "foo", df["a"])
  20. tm.assert_index_equal(df.columns, Index(["foo", "c", "b", "a"]))
  21. tm.assert_series_equal(df["a"], df["foo"], check_names=False)
  22. df.insert(2, "bar", df["c"])
  23. tm.assert_index_equal(df.columns, Index(["foo", "c", "bar", "b", "a"]))
  24. tm.assert_almost_equal(df["c"], df["bar"], check_names=False)
  25. with pytest.raises(ValueError, match="already exists"):
  26. df.insert(1, "a", df["b"])
  27. msg = "cannot insert c, already exists"
  28. with pytest.raises(ValueError, match=msg):
  29. df.insert(1, "c", df["b"])
  30. df.columns.name = "some_name"
  31. # preserve columns name field
  32. df.insert(0, "baz", df["c"])
  33. assert df.columns.name == "some_name"
  34. def test_insert_column_bug_4032(self):
  35. # GH#4032, inserting a column and renaming causing errors
  36. df = DataFrame({"b": [1.1, 2.2]})
  37. df = df.rename(columns={})
  38. df.insert(0, "a", [1, 2])
  39. result = df.rename(columns={})
  40. str(result)
  41. expected = DataFrame([[1, 1.1], [2, 2.2]], columns=["a", "b"])
  42. tm.assert_frame_equal(result, expected)
  43. df.insert(0, "c", [1.3, 2.3])
  44. result = df.rename(columns={})
  45. str(result)
  46. expected = DataFrame([[1.3, 1, 1.1], [2.3, 2, 2.2]], columns=["c", "a", "b"])
  47. tm.assert_frame_equal(result, expected)
  48. def test_insert_with_columns_dups(self):
  49. # GH#14291
  50. df = DataFrame()
  51. df.insert(0, "A", ["g", "h", "i"], allow_duplicates=True)
  52. df.insert(0, "A", ["d", "e", "f"], allow_duplicates=True)
  53. df.insert(0, "A", ["a", "b", "c"], allow_duplicates=True)
  54. exp = DataFrame(
  55. [["a", "d", "g"], ["b", "e", "h"], ["c", "f", "i"]], columns=["A", "A", "A"]
  56. )
  57. tm.assert_frame_equal(df, exp)
  58. def test_insert_item_cache(self, using_array_manager, using_copy_on_write):
  59. df = DataFrame(np.random.randn(4, 3))
  60. ser = df[0]
  61. if using_array_manager:
  62. expected_warning = None
  63. else:
  64. # with BlockManager warn about high fragmentation of single dtype
  65. expected_warning = PerformanceWarning
  66. with tm.assert_produces_warning(expected_warning):
  67. for n in range(100):
  68. df[n + 3] = df[1] * n
  69. if using_copy_on_write:
  70. ser.iloc[0] = 99
  71. assert df.iloc[0, 0] == df[0][0]
  72. assert df.iloc[0, 0] != 99
  73. else:
  74. ser.values[0] = 99
  75. assert df.iloc[0, 0] == df[0][0]
  76. assert df.iloc[0, 0] == 99
  77. def test_insert_EA_no_warning(self):
  78. # PerformanceWarning about fragmented frame should not be raised when
  79. # using EAs (https://github.com/pandas-dev/pandas/issues/44098)
  80. df = DataFrame(np.random.randint(0, 100, size=(3, 100)), dtype="Int64")
  81. with tm.assert_produces_warning(None):
  82. df["a"] = np.array([1, 2, 3])
  83. def test_insert_frame(self):
  84. # GH#42403
  85. df = DataFrame({"col1": [1, 2], "col2": [3, 4]})
  86. msg = r"Expected a 1D array, got an array with shape \(2, 2\)"
  87. with pytest.raises(ValueError, match=msg):
  88. df.insert(1, "newcol", df)