spss.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. from __future__ import annotations
  2. from pathlib import Path
  3. from typing import (
  4. TYPE_CHECKING,
  5. Sequence,
  6. )
  7. from pandas._libs import lib
  8. from pandas.compat._optional import import_optional_dependency
  9. from pandas.util._validators import check_dtype_backend
  10. from pandas.core.dtypes.inference import is_list_like
  11. from pandas.io.common import stringify_path
  12. if TYPE_CHECKING:
  13. from pandas._typing import DtypeBackend
  14. from pandas import DataFrame
  15. def read_spss(
  16. path: str | Path,
  17. usecols: Sequence[str] | None = None,
  18. convert_categoricals: bool = True,
  19. dtype_backend: DtypeBackend | lib.NoDefault = lib.no_default,
  20. ) -> DataFrame:
  21. """
  22. Load an SPSS file from the file path, returning a DataFrame.
  23. Parameters
  24. ----------
  25. path : str or Path
  26. File path.
  27. usecols : list-like, optional
  28. Return a subset of the columns. If None, return all columns.
  29. convert_categoricals : bool, default is True
  30. Convert categorical columns into pd.Categorical.
  31. dtype_backend : {"numpy_nullable", "pyarrow"}, defaults to NumPy backed DataFrames
  32. Which dtype_backend to use, e.g. whether a DataFrame should have NumPy
  33. arrays, nullable dtypes are used for all dtypes that have a nullable
  34. implementation when "numpy_nullable" is set, pyarrow is used for all
  35. dtypes if "pyarrow" is set.
  36. The dtype_backends are still experimential.
  37. .. versionadded:: 2.0
  38. Returns
  39. -------
  40. DataFrame
  41. """
  42. pyreadstat = import_optional_dependency("pyreadstat")
  43. check_dtype_backend(dtype_backend)
  44. if usecols is not None:
  45. if not is_list_like(usecols):
  46. raise TypeError("usecols must be list-like.")
  47. usecols = list(usecols) # pyreadstat requires a list
  48. df, _ = pyreadstat.read_sav(
  49. stringify_path(path), usecols=usecols, apply_value_formats=convert_categoricals
  50. )
  51. if dtype_backend is not lib.no_default:
  52. df = df.convert_dtypes(dtype_backend=dtype_backend)
  53. return df