api.py 2.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. """
  2. This is a pseudo-public API for downstream libraries. We ask that downstream
  3. authors
  4. 1) Try to avoid using internals directly altogether, and failing that,
  5. 2) Use only functions exposed here (or in core.internals)
  6. """
  7. from __future__ import annotations
  8. import numpy as np
  9. from pandas._libs.internals import BlockPlacement
  10. from pandas._typing import Dtype
  11. from pandas.core.dtypes.common import (
  12. is_datetime64tz_dtype,
  13. is_period_dtype,
  14. pandas_dtype,
  15. )
  16. from pandas.core.arrays import DatetimeArray
  17. from pandas.core.construction import extract_array
  18. from pandas.core.internals.blocks import (
  19. Block,
  20. DatetimeTZBlock,
  21. ExtensionBlock,
  22. check_ndim,
  23. ensure_block_shape,
  24. extract_pandas_array,
  25. get_block_type,
  26. maybe_coerce_values,
  27. )
  28. def make_block(
  29. values, placement, klass=None, ndim=None, dtype: Dtype | None = None
  30. ) -> Block:
  31. """
  32. This is a pseudo-public analogue to blocks.new_block.
  33. We ask that downstream libraries use this rather than any fully-internal
  34. APIs, including but not limited to:
  35. - core.internals.blocks.make_block
  36. - Block.make_block
  37. - Block.make_block_same_class
  38. - Block.__init__
  39. """
  40. if dtype is not None:
  41. dtype = pandas_dtype(dtype)
  42. values, dtype = extract_pandas_array(values, dtype, ndim)
  43. if klass is ExtensionBlock and is_period_dtype(values.dtype):
  44. # GH-44681 changed PeriodArray to be stored in the 2D
  45. # NDArrayBackedExtensionBlock instead of ExtensionBlock
  46. # -> still allow ExtensionBlock to be passed in this case for back compat
  47. klass = None
  48. if klass is None:
  49. dtype = dtype or values.dtype
  50. klass = get_block_type(dtype)
  51. elif klass is DatetimeTZBlock and not is_datetime64tz_dtype(values.dtype):
  52. # pyarrow calls get here
  53. values = DatetimeArray._simple_new(values, dtype=dtype)
  54. if not isinstance(placement, BlockPlacement):
  55. placement = BlockPlacement(placement)
  56. ndim = maybe_infer_ndim(values, placement, ndim)
  57. if is_datetime64tz_dtype(values.dtype) or is_period_dtype(values.dtype):
  58. # GH#41168 ensure we can pass 1D dt64tz values
  59. # More generally, any EA dtype that isn't is_1d_only_ea_dtype
  60. values = extract_array(values, extract_numpy=True)
  61. values = ensure_block_shape(values, ndim)
  62. check_ndim(values, placement, ndim)
  63. values = maybe_coerce_values(values)
  64. return klass(values, ndim=ndim, placement=placement)
  65. def maybe_infer_ndim(values, placement: BlockPlacement, ndim: int | None) -> int:
  66. """
  67. If `ndim` is not provided, infer it from placement and values.
  68. """
  69. if ndim is None:
  70. # GH#38134 Block constructor now assumes ndim is not None
  71. if not isinstance(values.dtype, np.dtype):
  72. if len(placement) != 1:
  73. ndim = 1
  74. else:
  75. ndim = 2
  76. else:
  77. ndim = values.ndim
  78. return ndim