docscrape.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715
  1. """Extract reference documentation from the NumPy source tree.
  2. Copyright (C) 2008 Stefan van der Walt <stefan@mentat.za.net>, Pauli Virtanen <pav@iki.fi>
  3. Redistribution and use in source and binary forms, with or without
  4. modification, are permitted provided that the following conditions are
  5. met:
  6. 1. Redistributions of source code must retain the above copyright
  7. notice, this list of conditions and the following disclaimer.
  8. 2. Redistributions in binary form must reproduce the above copyright
  9. notice, this list of conditions and the following disclaimer in
  10. the documentation and/or other materials provided with the
  11. distribution.
  12. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  13. IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  14. WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  15. DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
  16. INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  17. (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  18. SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  19. HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  20. STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
  21. IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  22. POSSIBILITY OF SUCH DAMAGE.
  23. """
  24. import inspect
  25. import textwrap
  26. import re
  27. import pydoc
  28. from warnings import warn
  29. from collections import namedtuple
  30. from collections.abc import Callable, Mapping
  31. import copy
  32. import sys
  33. def strip_blank_lines(l):
  34. "Remove leading and trailing blank lines from a list of lines"
  35. while l and not l[0].strip():
  36. del l[0]
  37. while l and not l[-1].strip():
  38. del l[-1]
  39. return l
  40. class Reader:
  41. """A line-based string reader.
  42. """
  43. def __init__(self, data):
  44. """
  45. Parameters
  46. ----------
  47. data : str
  48. String with lines separated by '\n'.
  49. """
  50. if isinstance(data, list):
  51. self._str = data
  52. else:
  53. self._str = data.split('\n') # store string as list of lines
  54. self.reset()
  55. def __getitem__(self, n):
  56. return self._str[n]
  57. def reset(self):
  58. self._l = 0 # current line nr
  59. def read(self):
  60. if not self.eof():
  61. out = self[self._l]
  62. self._l += 1
  63. return out
  64. else:
  65. return ''
  66. def seek_next_non_empty_line(self):
  67. for l in self[self._l:]:
  68. if l.strip():
  69. break
  70. else:
  71. self._l += 1
  72. def eof(self):
  73. return self._l >= len(self._str)
  74. def read_to_condition(self, condition_func):
  75. start = self._l
  76. for line in self[start:]:
  77. if condition_func(line):
  78. return self[start:self._l]
  79. self._l += 1
  80. if self.eof():
  81. return self[start:self._l+1]
  82. return []
  83. def read_to_next_empty_line(self):
  84. self.seek_next_non_empty_line()
  85. def is_empty(line):
  86. return not line.strip()
  87. return self.read_to_condition(is_empty)
  88. def read_to_next_unindented_line(self):
  89. def is_unindented(line):
  90. return (line.strip() and (len(line.lstrip()) == len(line)))
  91. return self.read_to_condition(is_unindented)
  92. def peek(self, n=0):
  93. if self._l + n < len(self._str):
  94. return self[self._l + n]
  95. else:
  96. return ''
  97. def is_empty(self):
  98. return not ''.join(self._str).strip()
  99. class ParseError(Exception):
  100. def __str__(self):
  101. message = self.args[0]
  102. if hasattr(self, 'docstring'):
  103. message = f"{message} in {self.docstring!r}"
  104. return message
  105. Parameter = namedtuple('Parameter', ['name', 'type', 'desc'])
  106. class NumpyDocString(Mapping):
  107. """Parses a numpydoc string to an abstract representation
  108. Instances define a mapping from section title to structured data.
  109. """
  110. sections = {
  111. 'Signature': '',
  112. 'Summary': [''],
  113. 'Extended Summary': [],
  114. 'Parameters': [],
  115. 'Returns': [],
  116. 'Yields': [],
  117. 'Receives': [],
  118. 'Raises': [],
  119. 'Warns': [],
  120. 'Other Parameters': [],
  121. 'Attributes': [],
  122. 'Methods': [],
  123. 'See Also': [],
  124. 'Notes': [],
  125. 'Warnings': [],
  126. 'References': '',
  127. 'Examples': '',
  128. 'index': {}
  129. }
  130. def __init__(self, docstring, config={}):
  131. orig_docstring = docstring
  132. docstring = textwrap.dedent(docstring).split('\n')
  133. self._doc = Reader(docstring)
  134. self._parsed_data = copy.deepcopy(self.sections)
  135. try:
  136. self._parse()
  137. except ParseError as e:
  138. e.docstring = orig_docstring
  139. raise
  140. def __getitem__(self, key):
  141. return self._parsed_data[key]
  142. def __setitem__(self, key, val):
  143. if key not in self._parsed_data:
  144. self._error_location(f"Unknown section {key}", error=False)
  145. else:
  146. self._parsed_data[key] = val
  147. def __iter__(self):
  148. return iter(self._parsed_data)
  149. def __len__(self):
  150. return len(self._parsed_data)
  151. def _is_at_section(self):
  152. self._doc.seek_next_non_empty_line()
  153. if self._doc.eof():
  154. return False
  155. l1 = self._doc.peek().strip() # e.g. Parameters
  156. if l1.startswith('.. index::'):
  157. return True
  158. l2 = self._doc.peek(1).strip() # ---------- or ==========
  159. return l2.startswith('-'*len(l1)) or l2.startswith('='*len(l1))
  160. def _strip(self, doc):
  161. i = 0
  162. j = 0
  163. for i, line in enumerate(doc):
  164. if line.strip():
  165. break
  166. for j, line in enumerate(doc[::-1]):
  167. if line.strip():
  168. break
  169. return doc[i:len(doc)-j]
  170. def _read_to_next_section(self):
  171. section = self._doc.read_to_next_empty_line()
  172. while not self._is_at_section() and not self._doc.eof():
  173. if not self._doc.peek(-1).strip(): # previous line was empty
  174. section += ['']
  175. section += self._doc.read_to_next_empty_line()
  176. return section
  177. def _read_sections(self):
  178. while not self._doc.eof():
  179. data = self._read_to_next_section()
  180. name = data[0].strip()
  181. if name.startswith('..'): # index section
  182. yield name, data[1:]
  183. elif len(data) < 2:
  184. yield StopIteration
  185. else:
  186. yield name, self._strip(data[2:])
  187. def _parse_param_list(self, content, single_element_is_type=False):
  188. r = Reader(content)
  189. params = []
  190. while not r.eof():
  191. header = r.read().strip()
  192. if ' : ' in header:
  193. arg_name, arg_type = header.split(' : ')[:2]
  194. else:
  195. if single_element_is_type:
  196. arg_name, arg_type = '', header
  197. else:
  198. arg_name, arg_type = header, ''
  199. desc = r.read_to_next_unindented_line()
  200. desc = dedent_lines(desc)
  201. desc = strip_blank_lines(desc)
  202. params.append(Parameter(arg_name, arg_type, desc))
  203. return params
  204. # See also supports the following formats.
  205. #
  206. # <FUNCNAME>
  207. # <FUNCNAME> SPACE* COLON SPACE+ <DESC> SPACE*
  208. # <FUNCNAME> ( COMMA SPACE+ <FUNCNAME>)+ (COMMA | PERIOD)? SPACE*
  209. # <FUNCNAME> ( COMMA SPACE+ <FUNCNAME>)* SPACE* COLON SPACE+ <DESC> SPACE*
  210. # <FUNCNAME> is one of
  211. # <PLAIN_FUNCNAME>
  212. # COLON <ROLE> COLON BACKTICK <PLAIN_FUNCNAME> BACKTICK
  213. # where
  214. # <PLAIN_FUNCNAME> is a legal function name, and
  215. # <ROLE> is any nonempty sequence of word characters.
  216. # Examples: func_f1 :meth:`func_h1` :obj:`~baz.obj_r` :class:`class_j`
  217. # <DESC> is a string describing the function.
  218. _role = r":(?P<role>\w+):"
  219. _funcbacktick = r"`(?P<name>(?:~\w+\.)?[a-zA-Z0-9_\.-]+)`"
  220. _funcplain = r"(?P<name2>[a-zA-Z0-9_\.-]+)"
  221. _funcname = r"(" + _role + _funcbacktick + r"|" + _funcplain + r")"
  222. _funcnamenext = _funcname.replace('role', 'rolenext')
  223. _funcnamenext = _funcnamenext.replace('name', 'namenext')
  224. _description = r"(?P<description>\s*:(\s+(?P<desc>\S+.*))?)?\s*$"
  225. _func_rgx = re.compile(r"^\s*" + _funcname + r"\s*")
  226. _line_rgx = re.compile(
  227. r"^\s*" +
  228. r"(?P<allfuncs>" + # group for all function names
  229. _funcname +
  230. r"(?P<morefuncs>([,]\s+" + _funcnamenext + r")*)" +
  231. r")" + # end of "allfuncs"
  232. r"(?P<trailing>[,\.])?" + # Some function lists have a trailing comma (or period) '\s*'
  233. _description)
  234. # Empty <DESC> elements are replaced with '..'
  235. empty_description = '..'
  236. def _parse_see_also(self, content):
  237. """
  238. func_name : Descriptive text
  239. continued text
  240. another_func_name : Descriptive text
  241. func_name1, func_name2, :meth:`func_name`, func_name3
  242. """
  243. items = []
  244. def parse_item_name(text):
  245. """Match ':role:`name`' or 'name'."""
  246. m = self._func_rgx.match(text)
  247. if not m:
  248. raise ParseError(f"{text} is not a item name")
  249. role = m.group('role')
  250. name = m.group('name') if role else m.group('name2')
  251. return name, role, m.end()
  252. rest = []
  253. for line in content:
  254. if not line.strip():
  255. continue
  256. line_match = self._line_rgx.match(line)
  257. description = None
  258. if line_match:
  259. description = line_match.group('desc')
  260. if line_match.group('trailing') and description:
  261. self._error_location(
  262. 'Unexpected comma or period after function list at index %d of '
  263. 'line "%s"' % (line_match.end('trailing'), line),
  264. error=False)
  265. if not description and line.startswith(' '):
  266. rest.append(line.strip())
  267. elif line_match:
  268. funcs = []
  269. text = line_match.group('allfuncs')
  270. while True:
  271. if not text.strip():
  272. break
  273. name, role, match_end = parse_item_name(text)
  274. funcs.append((name, role))
  275. text = text[match_end:].strip()
  276. if text and text[0] == ',':
  277. text = text[1:].strip()
  278. rest = list(filter(None, [description]))
  279. items.append((funcs, rest))
  280. else:
  281. raise ParseError(f"{line} is not a item name")
  282. return items
  283. def _parse_index(self, section, content):
  284. """
  285. .. index: default
  286. :refguide: something, else, and more
  287. """
  288. def strip_each_in(lst):
  289. return [s.strip() for s in lst]
  290. out = {}
  291. section = section.split('::')
  292. if len(section) > 1:
  293. out['default'] = strip_each_in(section[1].split(','))[0]
  294. for line in content:
  295. line = line.split(':')
  296. if len(line) > 2:
  297. out[line[1]] = strip_each_in(line[2].split(','))
  298. return out
  299. def _parse_summary(self):
  300. """Grab signature (if given) and summary"""
  301. if self._is_at_section():
  302. return
  303. # If several signatures present, take the last one
  304. while True:
  305. summary = self._doc.read_to_next_empty_line()
  306. summary_str = " ".join([s.strip() for s in summary]).strip()
  307. compiled = re.compile(r'^([\w., ]+=)?\s*[\w\.]+\(.*\)$')
  308. if compiled.match(summary_str):
  309. self['Signature'] = summary_str
  310. if not self._is_at_section():
  311. continue
  312. break
  313. if summary is not None:
  314. self['Summary'] = summary
  315. if not self._is_at_section():
  316. self['Extended Summary'] = self._read_to_next_section()
  317. def _parse(self):
  318. self._doc.reset()
  319. self._parse_summary()
  320. sections = list(self._read_sections())
  321. section_names = {section for section, content in sections}
  322. has_returns = 'Returns' in section_names
  323. has_yields = 'Yields' in section_names
  324. # We could do more tests, but we are not. Arbitrarily.
  325. if has_returns and has_yields:
  326. msg = 'Docstring contains both a Returns and Yields section.'
  327. raise ValueError(msg)
  328. if not has_yields and 'Receives' in section_names:
  329. msg = 'Docstring contains a Receives section but not Yields.'
  330. raise ValueError(msg)
  331. for (section, content) in sections:
  332. if not section.startswith('..'):
  333. section = (s.capitalize() for s in section.split(' '))
  334. section = ' '.join(section)
  335. if self.get(section):
  336. self._error_location(f"The section {section} appears twice")
  337. if section in ('Parameters', 'Other Parameters', 'Attributes',
  338. 'Methods'):
  339. self[section] = self._parse_param_list(content)
  340. elif section in ('Returns', 'Yields', 'Raises', 'Warns', 'Receives'):
  341. self[section] = self._parse_param_list(
  342. content, single_element_is_type=True)
  343. elif section.startswith('.. index::'):
  344. self['index'] = self._parse_index(section, content)
  345. elif section == 'See Also':
  346. self['See Also'] = self._parse_see_also(content)
  347. else:
  348. self[section] = content
  349. def _error_location(self, msg, error=True):
  350. if hasattr(self, '_obj'):
  351. # we know where the docs came from:
  352. try:
  353. filename = inspect.getsourcefile(self._obj)
  354. except TypeError:
  355. filename = None
  356. msg = msg + f" in the docstring of {self._obj} in {filename}."
  357. if error:
  358. raise ValueError(msg)
  359. else:
  360. warn(msg)
  361. # string conversion routines
  362. def _str_header(self, name, symbol='-'):
  363. return [name, len(name)*symbol]
  364. def _str_indent(self, doc, indent=4):
  365. out = []
  366. for line in doc:
  367. out += [' '*indent + line]
  368. return out
  369. def _str_signature(self):
  370. if self['Signature']:
  371. return [self['Signature'].replace('*', r'\*')] + ['']
  372. else:
  373. return ['']
  374. def _str_summary(self):
  375. if self['Summary']:
  376. return self['Summary'] + ['']
  377. else:
  378. return []
  379. def _str_extended_summary(self):
  380. if self['Extended Summary']:
  381. return self['Extended Summary'] + ['']
  382. else:
  383. return []
  384. def _str_param_list(self, name):
  385. out = []
  386. if self[name]:
  387. out += self._str_header(name)
  388. for param in self[name]:
  389. parts = []
  390. if param.name:
  391. parts.append(param.name)
  392. if param.type:
  393. parts.append(param.type)
  394. out += [' : '.join(parts)]
  395. if param.desc and ''.join(param.desc).strip():
  396. out += self._str_indent(param.desc)
  397. out += ['']
  398. return out
  399. def _str_section(self, name):
  400. out = []
  401. if self[name]:
  402. out += self._str_header(name)
  403. out += self[name]
  404. out += ['']
  405. return out
  406. def _str_see_also(self, func_role):
  407. if not self['See Also']:
  408. return []
  409. out = []
  410. out += self._str_header("See Also")
  411. out += ['']
  412. last_had_desc = True
  413. for funcs, desc in self['See Also']:
  414. assert isinstance(funcs, list)
  415. links = []
  416. for func, role in funcs:
  417. if role:
  418. link = f':{role}:`{func}`'
  419. elif func_role:
  420. link = f':{func_role}:`{func}`'
  421. else:
  422. link = f"`{func}`_"
  423. links.append(link)
  424. link = ', '.join(links)
  425. out += [link]
  426. if desc:
  427. out += self._str_indent([' '.join(desc)])
  428. last_had_desc = True
  429. else:
  430. last_had_desc = False
  431. out += self._str_indent([self.empty_description])
  432. if last_had_desc:
  433. out += ['']
  434. out += ['']
  435. return out
  436. def _str_index(self):
  437. idx = self['index']
  438. out = []
  439. output_index = False
  440. default_index = idx.get('default', '')
  441. if default_index:
  442. output_index = True
  443. out += [f'.. index:: {default_index}']
  444. for section, references in idx.items():
  445. if section == 'default':
  446. continue
  447. output_index = True
  448. out += [f" :{section}: {', '.join(references)}"]
  449. if output_index:
  450. return out
  451. else:
  452. return ''
  453. def __str__(self, func_role=''):
  454. out = []
  455. out += self._str_signature()
  456. out += self._str_summary()
  457. out += self._str_extended_summary()
  458. for param_list in ('Parameters', 'Returns', 'Yields', 'Receives',
  459. 'Other Parameters', 'Raises', 'Warns'):
  460. out += self._str_param_list(param_list)
  461. out += self._str_section('Warnings')
  462. out += self._str_see_also(func_role)
  463. for s in ('Notes', 'References', 'Examples'):
  464. out += self._str_section(s)
  465. for param_list in ('Attributes', 'Methods'):
  466. out += self._str_param_list(param_list)
  467. out += self._str_index()
  468. return '\n'.join(out)
  469. def indent(str, indent=4):
  470. indent_str = ' '*indent
  471. if str is None:
  472. return indent_str
  473. lines = str.split('\n')
  474. return '\n'.join(indent_str + l for l in lines)
  475. def dedent_lines(lines):
  476. """Deindent a list of lines maximally"""
  477. return textwrap.dedent("\n".join(lines)).split("\n")
  478. def header(text, style='-'):
  479. return text + '\n' + style*len(text) + '\n'
  480. class FunctionDoc(NumpyDocString):
  481. def __init__(self, func, role='func', doc=None, config={}):
  482. self._f = func
  483. self._role = role # e.g. "func" or "meth"
  484. if doc is None:
  485. if func is None:
  486. raise ValueError("No function or docstring given")
  487. doc = inspect.getdoc(func) or ''
  488. NumpyDocString.__init__(self, doc, config)
  489. if not self['Signature'] and func is not None:
  490. func, func_name = self.get_func()
  491. try:
  492. try:
  493. signature = str(inspect.signature(func))
  494. except (AttributeError, ValueError):
  495. # try to read signature, backward compat for older Python
  496. if sys.version_info[0] >= 3:
  497. argspec = inspect.getfullargspec(func)
  498. else:
  499. argspec = inspect.getargspec(func)
  500. signature = inspect.formatargspec(*argspec)
  501. signature = f'{func_name}{signature}'
  502. except TypeError:
  503. signature = f'{func_name}()'
  504. self['Signature'] = signature
  505. def get_func(self):
  506. func_name = getattr(self._f, '__name__', self.__class__.__name__)
  507. if inspect.isclass(self._f):
  508. func = getattr(self._f, '__call__', self._f.__init__)
  509. else:
  510. func = self._f
  511. return func, func_name
  512. def __str__(self):
  513. out = ''
  514. func, func_name = self.get_func()
  515. roles = {'func': 'function',
  516. 'meth': 'method'}
  517. if self._role:
  518. if self._role not in roles:
  519. print(f"Warning: invalid role {self._role}")
  520. out += f".. {roles.get(self._role, '')}:: {func_name}\n \n\n"
  521. out += super().__str__(func_role=self._role)
  522. return out
  523. class ClassDoc(NumpyDocString):
  524. extra_public_methods = ['__call__']
  525. def __init__(self, cls, doc=None, modulename='', func_doc=FunctionDoc,
  526. config={}):
  527. if not inspect.isclass(cls) and cls is not None:
  528. raise ValueError(f"Expected a class or None, but got {cls!r}")
  529. self._cls = cls
  530. if 'sphinx' in sys.modules:
  531. from sphinx.ext.autodoc import ALL
  532. else:
  533. ALL = object()
  534. self.show_inherited_members = config.get(
  535. 'show_inherited_class_members', True)
  536. if modulename and not modulename.endswith('.'):
  537. modulename += '.'
  538. self._mod = modulename
  539. if doc is None:
  540. if cls is None:
  541. raise ValueError("No class or documentation string given")
  542. doc = pydoc.getdoc(cls)
  543. NumpyDocString.__init__(self, doc)
  544. _members = config.get('members', [])
  545. if _members is ALL:
  546. _members = None
  547. _exclude = config.get('exclude-members', [])
  548. if config.get('show_class_members', True) and _exclude is not ALL:
  549. def splitlines_x(s):
  550. if not s:
  551. return []
  552. else:
  553. return s.splitlines()
  554. for field, items in [('Methods', self.methods),
  555. ('Attributes', self.properties)]:
  556. if not self[field]:
  557. doc_list = []
  558. for name in sorted(items):
  559. if (name in _exclude or
  560. (_members and name not in _members)):
  561. continue
  562. try:
  563. doc_item = pydoc.getdoc(getattr(self._cls, name))
  564. doc_list.append(
  565. Parameter(name, '', splitlines_x(doc_item)))
  566. except AttributeError:
  567. pass # method doesn't exist
  568. self[field] = doc_list
  569. @property
  570. def methods(self):
  571. if self._cls is None:
  572. return []
  573. return [name for name, func in inspect.getmembers(self._cls)
  574. if ((not name.startswith('_')
  575. or name in self.extra_public_methods)
  576. and isinstance(func, Callable)
  577. and self._is_show_member(name))]
  578. @property
  579. def properties(self):
  580. if self._cls is None:
  581. return []
  582. return [name for name, func in inspect.getmembers(self._cls)
  583. if (not name.startswith('_') and
  584. (func is None or isinstance(func, property) or
  585. inspect.isdatadescriptor(func))
  586. and self._is_show_member(name))]
  587. def _is_show_member(self, name):
  588. if self.show_inherited_members:
  589. return True # show all class members
  590. if name not in self._cls.__dict__:
  591. return False # class member is inherited, we do not show it
  592. return True