pdoc.docstrings
This module handles the conversion of docstring flavors to Markdown.
The conversion from docstring flavors to Markdown is mostly done with regular expressions. This is not particularly beautiful, but good enough for our purposes. The alternative would be to depend on https://github.com/rr-/docstring_parser or a similar project, but that introduces more complexity than we are comfortable with.
If you miss a particular feature for your favorite flavor, contributions are welcome.
That being said, please keep the complexity low and make sure that changes are
accompanied by matching snapshot tests in test/testdata/
.
1""" 2This module handles the conversion of docstring flavors to Markdown. 3 4The conversion from docstring flavors to Markdown is mostly done with regular expressions. 5This is not particularly beautiful, but good enough for our purposes. 6The alternative would be to depend on <https://github.com/rr-/docstring_parser> or a similar project, 7but that introduces more complexity than we are comfortable with. 8 9If you miss a particular feature for your favorite flavor, contributions are welcome. 10That being said, please keep the complexity low and make sure that changes are 11accompanied by matching snapshot tests in `test/testdata/`. 12""" 13from __future__ import annotations 14 15import inspect 16import re 17import warnings 18from pathlib import Path 19from textwrap import dedent, indent 20 21from ._compat import cache 22 23 24@cache 25def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 26 """ 27 Convert `docstring` from `docformat` to Markdown. 28 """ 29 docformat = docformat.lower() 30 31 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 32 docstring = rst(docstring, source_file) 33 34 if "google" in docformat: 35 docstring = google(docstring) 36 37 if "numpy" in docformat: 38 docstring = numpy(docstring) 39 40 return docstring 41 42 43def google(docstring: str) -> str: 44 """Convert Google-style docstring sections into Markdown.""" 45 return re.sub( 46 r""" 47 ^(?P<name>[A-Z][A-Z a-z]+):\n 48 (?P<contents>( 49 \n # empty lines 50 | # or 51 [ \t]+.+ # lines with indentation 52 )+)$ 53 """, 54 _google_section, 55 docstring, 56 flags=re.VERBOSE | re.MULTILINE, 57 ) 58 59 60GOOGLE_LIST_SECTIONS = ["Args", "Raises", "Attributes"] 61 62 63def _google_section(m: re.Match[str]) -> str: 64 name = m.group("name") 65 contents = dedent(m.group("contents")) 66 if name in GOOGLE_LIST_SECTIONS: 67 items = _indented_list(contents) 68 contents = "" 69 for item in items: 70 try: 71 # first ":" on the first line 72 _, attr, desc = re.split(r"^(.+?:)", item, maxsplit=1) 73 except ValueError: 74 contents += " - " + indent(item, " ")[3:] 75 else: 76 contents += f" - **{attr}** " + indent(desc, " ")[3:] 77 contents += "\n" 78 else: 79 contents = indent(contents, "> ", lambda line: True) 80 81 return f"\n###### {name}\n{contents}\n" 82 83 84def _indented_list(contents: str) -> list[str]: 85 """ 86 Convert a list string into individual (dedented) elements. For example, 87 88 foo: 89 desc 90 bar: int 91 more desc 92 baz: 93 desc 94 indented 95 96 returns [ 97 "foo:\ndesc", 98 "bar: int\nmore desc", 99 "baz:\ndesc\n indented", 100 ] 101 """ 102 # we expect this to be through cleandoc() already. 103 assert not contents.startswith(" ") 104 assert not contents.startswith("\n") 105 assert "\t" not in contents 106 107 ret: list[str] = [] 108 for line in contents.splitlines(keepends=True): 109 empty = not line.strip() 110 indented = line.startswith(" ") 111 if not (empty or indented): 112 # new section 113 ret.append(line) 114 else: 115 # append to current section 116 ret[-1] += line 117 118 return [inspect.cleandoc(x) for x in ret] 119 120 121def numpy(docstring: str) -> str: 122 """Convert NumPy-style docstring sections into Markdown. 123 124 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 125 """ 126 sections = re.split( 127 r""" 128 ^([A-Z][A-Za-z ]+)\n # a heading 129 ---+\n+ # followed by a dashed line 130 """, 131 docstring, 132 flags=re.VERBOSE | re.MULTILINE, 133 ) 134 contents = sections[0] 135 for heading, content in zip(sections[1::2], sections[2::2]): 136 if content.startswith(" "): 137 # If the first line of section content is indented, we consider the section to be finished 138 # on the first non-indented line. We take out the rest - the tail - here. 139 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 140 else: 141 tail = "" 142 143 if heading in ( 144 "Parameters", 145 "Returns", 146 "Yields", 147 "Receives", 148 "Other Parameters", 149 "Raises", 150 "Warns", 151 "Attributes", 152 ): 153 contents += f"###### {heading}\n{_numpy_parameters(content)}" 154 elif heading == "See Also": 155 contents += f"###### {heading}\n{_numpy_seealso(content)}" 156 else: 157 contents += f"###### {heading}\n{dedent(content)}" 158 contents += tail 159 return contents 160 161 162def _numpy_seealso(content: str) -> str: 163 """Convert a NumPy-style "See Also" section into Markdown""" 164 contents = "" 165 for item in _indented_list(content): 166 if ":" in item: 167 funcstr, desc = item.split(":", maxsplit=1) 168 desc = f": {desc}" 169 else: 170 funcstr, desc = item, "" 171 172 funclist = [f.strip() for f in funcstr.split(" ")] 173 funcs = ", ".join(f"`{f}`" for f in funclist if f) 174 contents += f"{funcs}{desc} \n" 175 return contents 176 177 178def _numpy_parameters(content: str) -> str: 179 """Convert a NumPy-style parameter section into Markdown""" 180 contents = "" 181 for item in _indented_list(content): 182 m = re.match(r"^(.+):(.+)([\s\S]*)", item) 183 if m: 184 contents += ( 185 f" - **{m.group(1).strip()}** ({m.group(2).strip()}):\n" 186 f"{indent(m.group(3).strip(), ' ')}\n" 187 ) 188 else: 189 if "\n" in item: 190 name, desc = item.split("\n", maxsplit=1) 191 name = name.strip() 192 desc = desc.strip() 193 else: 194 name, desc = item.strip(), "" 195 196 if desc: 197 contents += f" - **{name}**: {desc}\n" 198 else: 199 contents += f" - **{name}**\n" 200 return f"{contents}\n" 201 202 203def rst(contents: str, source_file: Path | None) -> str: 204 """ 205 Convert reStructuredText elements to Markdown. 206 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 207 """ 208 contents = _rst_admonitions(contents, source_file) 209 contents = _rst_links(contents) 210 211 # Code References: :obj:`foo` -> `foo` 212 contents = re.sub( 213 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):", "", contents 214 ) 215 216 # Math: :math:`foo` -> \\( foo \\) 217 # We don't use $ as that's not enabled by MathJax by default. 218 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 219 220 contents = _rst_footnotes(contents) 221 222 contents = _rst_fields(contents) 223 224 return contents 225 226 227def _rst_footnotes(contents: str) -> str: 228 """Convert reStructuredText footnotes""" 229 footnotes: set[str] = set() 230 autonum: int 231 232 def register_footnote(m: re.Match[str]) -> str: 233 nonlocal autonum 234 fn_id = m.group("id") 235 if fn_id in "*#": 236 fn_id = f"fn-{autonum}" 237 autonum += 1 238 fn_id = fn_id.lstrip("#*") 239 footnotes.add(fn_id) 240 content = indent(m.group("content"), " ").lstrip() 241 return f"{m.group('indent')}[^{fn_id}]: {content}" 242 243 # Register footnotes 244 autonum = 1 245 contents = re.sub( 246 r""" 247 ^(?P<indent>[ ]*)\.\.[ ]+\[(?P<id>\d+|[#*]\w*)](?P<content>.* 248 ( 249 \n # empty lines 250 | # or 251 (?P=indent)[ ]+.+ # lines with indentation 252 )*)$ 253 """, 254 register_footnote, 255 contents, 256 flags=re.MULTILINE | re.VERBOSE, 257 ) 258 259 def replace_references(m: re.Match[str]) -> str: 260 nonlocal autonum 261 fn_id = m.group("id") 262 if fn_id in "*#": 263 fn_id = f"fn-{autonum}" 264 autonum += 1 265 fn_id = fn_id.lstrip("#*") 266 if fn_id in footnotes: 267 return f"[^{fn_id}]" 268 else: 269 return m.group(0) 270 271 autonum = 1 272 contents = re.sub(r"\[(?P<id>\d+|[#*]\w*)]_", replace_references, contents) 273 return contents 274 275 276def _rst_links(contents: str) -> str: 277 """Convert reStructuredText hyperlinks""" 278 links = {} 279 280 def register_link(m: re.Match[str]) -> str: 281 refid = re.sub(r"\s", "", m.group("id").lower()) 282 links[refid] = m.group("url") 283 return "" 284 285 def replace_link(m: re.Match[str]) -> str: 286 text = m.group("id") 287 refid = re.sub(r"[\s`]", "", text.lower()) 288 try: 289 return f"[{text.strip('`')}]({links[refid]})" 290 except KeyError: 291 return m.group(0) 292 293 # Embedded URIs 294 contents = re.sub( 295 r"`(?P<text>[^`]+)<(?P<url>.+?)>`_", r"[\g<text>](\g<url>)", contents 296 ) 297 # External Hyperlink Targets 298 contents = re.sub( 299 r"^\s*..\s+_(?P<id>[^\n:]+):\s*(?P<url>http\S+)", 300 register_link, 301 contents, 302 flags=re.MULTILINE, 303 ) 304 contents = re.sub(r"(?P<id>[A-Za-z0-9_\-.:+]|`[^`]+`)_", replace_link, contents) 305 return contents 306 307 308def _rst_admonitions(contents: str, source_file: Path | None) -> str: 309 """ 310 Convert reStructuredText admonitions - a bit tricky because they may already be indented themselves. 311 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html> 312 """ 313 314 def _rst_admonition(m: re.Match[str]) -> str: 315 ind = m.group("indent") 316 type = m.group("type") 317 val = m.group("val").strip() 318 contents = dedent(m.group("contents")).strip() 319 320 if type == "include": 321 loc = source_file or Path(".") 322 try: 323 included = (loc.parent / val).read_text("utf8", "replace") 324 except OSError as e: 325 warnings.warn(f"Cannot include {val!r}: {e}") 326 included = "\n" 327 included = _rst_admonitions(included, loc.parent / val) 328 return indent(included, ind) 329 if type == "math": 330 return f"{ind}$${val}{contents}$$\n" 331 if type in ("note", "warning", "danger"): 332 if val: 333 heading = f"{ind}###### {val}\n" 334 else: 335 heading = "" 336 return ( 337 f'{ind}<div class="pdoc-alert pdoc-alert-{type}" markdown="1">\n' 338 f"{heading}" 339 f"{indent(contents, ind)}\n" 340 f"{ind}</div>\n" 341 ) 342 elif type == "versionadded": 343 text = f"New in version {val}" 344 elif type == "versionchanged": 345 text = f"Changed in version {val}" 346 elif type == "deprecated": 347 text = f"Deprecated since version {val}" 348 else: 349 text = f"{type} {val}".strip() 350 351 if contents: 352 text = f"{ind}*{text}:*\n{indent(contents, ind)}\n\n" 353 else: 354 text = f"{ind}*{text}.*\n" 355 356 return text 357 358 admonition = "note|warning|danger|versionadded|versionchanged|deprecated|seealso|math|include" 359 return re.sub( 360 rf""" 361 ^(?P<indent>[ ]*)\.\.[ ]+(?P<type>{admonition})::(?P<val>.*) 362 (?P<contents>( 363 \n # empty lines 364 | # or 365 (?P=indent)[ ]+.+ # lines with indentation 366 )*)$ 367 """, 368 _rst_admonition, 369 contents, 370 flags=re.MULTILINE | re.VERBOSE, 371 ) 372 373 374def _rst_fields(contents: str) -> str: 375 """ 376 Convert reStructuredText fields to Markdown. 377 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#rst-field-lists> 378 """ 379 380 _has_parameter_section = False 381 _has_raises_section = False 382 383 def _rst_field(m: re.Match[str]) -> str: 384 type = m["type"] 385 body = m["body"] 386 387 if m["name"]: 388 name = f"**{m['name'].strip()}**: " 389 else: 390 name = "" 391 392 if type == "param": 393 nonlocal _has_parameter_section 394 text = f" - {name}{body}" 395 if not _has_parameter_section: 396 _has_parameter_section = True 397 text = "\n###### Parameters\n" + text 398 return text 399 elif type == "type": 400 return "" # we expect users to use modern type annotations. 401 elif type == "return": 402 body = indent(body, "> ", lambda line: True) 403 return f"\n###### Returns\n{body}" 404 elif type == "rtype": 405 return "" # we expect users to use modern type annotations. 406 elif type == "raises": 407 nonlocal _has_raises_section 408 text = f" - {name}{body}" 409 if not _has_raises_section: 410 _has_raises_section = True 411 text = "\n###### Raises\n" + text 412 return text 413 else: # pragma: no cover 414 raise AssertionError("unreachable") 415 416 field = "param|type|return|rtype|raises" 417 return re.sub( 418 rf""" 419 ^:(?P<type>{field})(?:[ ]+(?P<name>.+))?: 420 (?P<body>.*( 421 (?:\n[ ]*)* # maybe some empty lines followed by 422 [ ]+.+ # lines with indentation 423 )*(?:\n|$)) 424 """, 425 _rst_field, 426 contents, 427 flags=re.MULTILINE | re.VERBOSE, 428 )
25@cache 26def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 27 """ 28 Convert `docstring` from `docformat` to Markdown. 29 """ 30 docformat = docformat.lower() 31 32 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 33 docstring = rst(docstring, source_file) 34 35 if "google" in docformat: 36 docstring = google(docstring) 37 38 if "numpy" in docformat: 39 docstring = numpy(docstring) 40 41 return docstring
Convert docstring
from docformat
to Markdown.
44def google(docstring: str) -> str: 45 """Convert Google-style docstring sections into Markdown.""" 46 return re.sub( 47 r""" 48 ^(?P<name>[A-Z][A-Z a-z]+):\n 49 (?P<contents>( 50 \n # empty lines 51 | # or 52 [ \t]+.+ # lines with indentation 53 )+)$ 54 """, 55 _google_section, 56 docstring, 57 flags=re.VERBOSE | re.MULTILINE, 58 )
Convert Google-style docstring sections into Markdown.
122def numpy(docstring: str) -> str: 123 """Convert NumPy-style docstring sections into Markdown. 124 125 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 126 """ 127 sections = re.split( 128 r""" 129 ^([A-Z][A-Za-z ]+)\n # a heading 130 ---+\n+ # followed by a dashed line 131 """, 132 docstring, 133 flags=re.VERBOSE | re.MULTILINE, 134 ) 135 contents = sections[0] 136 for heading, content in zip(sections[1::2], sections[2::2]): 137 if content.startswith(" "): 138 # If the first line of section content is indented, we consider the section to be finished 139 # on the first non-indented line. We take out the rest - the tail - here. 140 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 141 else: 142 tail = "" 143 144 if heading in ( 145 "Parameters", 146 "Returns", 147 "Yields", 148 "Receives", 149 "Other Parameters", 150 "Raises", 151 "Warns", 152 "Attributes", 153 ): 154 contents += f"###### {heading}\n{_numpy_parameters(content)}" 155 elif heading == "See Also": 156 contents += f"###### {heading}\n{_numpy_seealso(content)}" 157 else: 158 contents += f"###### {heading}\n{dedent(content)}" 159 contents += tail 160 return contents
Convert NumPy-style docstring sections into Markdown.
See https://numpydoc.readthedocs.io/en/latest/format.html for details.
204def rst(contents: str, source_file: Path | None) -> str: 205 """ 206 Convert reStructuredText elements to Markdown. 207 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 208 """ 209 contents = _rst_admonitions(contents, source_file) 210 contents = _rst_links(contents) 211 212 # Code References: :obj:`foo` -> `foo` 213 contents = re.sub( 214 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):", "", contents 215 ) 216 217 # Math: :math:`foo` -> \\( foo \\) 218 # We don't use $ as that's not enabled by MathJax by default. 219 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 220 221 contents = _rst_footnotes(contents) 222 223 contents = _rst_fields(contents) 224 225 return contents
Convert reStructuredText elements to Markdown. We support the most common elements, but we do not aim to mirror the full complexity of the spec here.