pdoc.docstrings
This module handles the conversion of docstring flavors to Markdown.
The conversion from docstring flavors to Markdown is mostly done with regular expressions. This is not particularly beautiful, but good enough for our purposes. The alternative would be to depend on https://github.com/rr-/docstring_parser or a similar project, but that introduces more complexity than we are comfortable with.
If you miss a particular feature for your favorite flavor, contributions are welcome.
That being said, please keep the complexity low and make sure that changes are
accompanied by matching snapshot tests in test/testdata/
.
1""" 2This module handles the conversion of docstring flavors to Markdown. 3 4The conversion from docstring flavors to Markdown is mostly done with regular expressions. 5This is not particularly beautiful, but good enough for our purposes. 6The alternative would be to depend on <https://github.com/rr-/docstring_parser> or a similar project, 7but that introduces more complexity than we are comfortable with. 8 9If you miss a particular feature for your favorite flavor, contributions are welcome. 10That being said, please keep the complexity low and make sure that changes are 11accompanied by matching snapshot tests in `test/testdata/`. 12""" 13 14from __future__ import annotations 15 16import base64 17import inspect 18import mimetypes 19import os 20from pathlib import Path 21import re 22from textwrap import dedent 23from textwrap import indent 24import warnings 25 26from ._compat import cache 27 28 29@cache 30def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 31 """ 32 Convert `docstring` from `docformat` to Markdown. 33 """ 34 docformat = docformat.lower() 35 36 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 37 docstring = rst(docstring, source_file) 38 39 if "google" in docformat: 40 docstring = google(docstring) 41 42 if "numpy" in docformat: 43 docstring = numpy(docstring) 44 45 if source_file is not None and os.environ.get("PDOC_EMBED_IMAGES") != "0": 46 docstring = embed_images(docstring, source_file) 47 48 return docstring 49 50 51def embed_images(docstring: str, source_file: Path) -> str: 52 def embed_local_image(m: re.Match) -> str: 53 image_path = source_file.parent / m["href"] 54 try: 55 image_data = image_path.read_bytes() 56 image_mime = mimetypes.guess_type(image_path)[0] 57 except Exception: 58 return m[0] 59 else: 60 data = base64.b64encode(image_data).decode() 61 return f"![{m['alt']}](data:{image_mime};base64,{data})" 62 63 return re.sub( 64 r"!\[\s*(?P<alt>.*?)\s*]\(\s*(?P<href>.+?)\s*\)", 65 embed_local_image, 66 docstring, 67 ) 68 # TODO: Could probably do more here, e.g. support rST or raw HTML replacements. 69 70 71def google(docstring: str) -> str: 72 """Convert Google-style docstring sections into Markdown.""" 73 return re.sub( 74 r""" 75 ^(?P<name>[A-Z][A-Z a-z]+):\n 76 (?P<contents>( 77 \n # empty lines 78 | # or 79 [ \t]+.+ # lines with indentation 80 )+)$ 81 """, 82 _google_section, 83 docstring, 84 flags=re.VERBOSE | re.MULTILINE, 85 ) 86 87 88GOOGLE_LIST_SECTIONS = ["Args", "Raises", "Attributes"] 89"""Section headers listed in the official Google docstring style guide.""" 90 91GOOGLE_LIST_SECTION_ALIASES = { 92 "Parameters": "Args", 93 "Params": "Args", 94 "Arguments": "Args", 95} 96""" 97Alternative section headers that are not listed in the official Google 98docstring style guide but that we recognize as sections containing lists 99nevertheless. 100""" 101 102 103def _google_section(m: re.Match[str]) -> str: 104 name = m.group("name") 105 contents = dedent(m.group("contents")).lstrip() 106 107 if name in GOOGLE_LIST_SECTION_ALIASES: 108 name = GOOGLE_LIST_SECTION_ALIASES[name] 109 110 if name in GOOGLE_LIST_SECTIONS: 111 items = _indented_list(contents) 112 contents = "" 113 for item in items: 114 try: 115 # first ":" on the first line 116 _, attr, desc = re.split(r"^(.+?:)", item, maxsplit=1) 117 except ValueError: 118 contents += " - " + indent(item, " ")[3:] 119 else: 120 contents += f" - **{attr}** " + indent(desc, " ")[3:] 121 contents += "\n" 122 else: 123 contents = indent(contents, "> ", lambda line: True) 124 125 if name == "Args": 126 name = "Arguments" 127 128 return f"\n###### {name}:\n{contents}\n" 129 130 131def _indented_list(contents: str) -> list[str]: 132 """ 133 Convert a list string into individual (dedented) elements. For example, 134 135 foo: 136 desc 137 bar: int 138 more desc 139 baz: 140 desc 141 indented 142 143 returns [ 144 "foo:\ndesc", 145 "bar: int\nmore desc", 146 "baz:\ndesc\n indented", 147 ] 148 """ 149 # we expect this to be through cleandoc() already. 150 assert not contents.startswith(" "), contents 151 assert not contents.startswith("\n"), contents 152 153 ret: list[str] = [] 154 for line in contents.splitlines(keepends=True): 155 empty = not line.strip() 156 indented = line.startswith(" ") 157 if not (empty or indented): 158 # new section 159 ret.append(line) 160 else: 161 # append to current section 162 ret[-1] += line 163 164 return [inspect.cleandoc(x) for x in ret] 165 166 167def numpy(docstring: str) -> str: 168 """Convert NumPy-style docstring sections into Markdown. 169 170 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 171 """ 172 sections = re.split( 173 r""" 174 ^([A-Z][A-Za-z ]+)\n # a heading 175 ---+\n+ # followed by a dashed line 176 """, 177 docstring, 178 flags=re.VERBOSE | re.MULTILINE, 179 ) 180 contents = sections[0] 181 for heading, content in zip(sections[1::2], sections[2::2]): 182 if content.startswith(" "): 183 # If the first line of section content is indented, we consider the section to be finished 184 # on the first non-indented line. We take out the rest - the tail - here. 185 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 186 else: 187 tail = "" 188 189 if heading in ( 190 "Parameters", 191 "Returns", 192 "Yields", 193 "Receives", 194 "Other Parameters", 195 "Raises", 196 "Warns", 197 "Attributes", 198 ): 199 contents += f"###### {heading}\n{_numpy_parameters(content)}" 200 elif heading == "See Also": 201 contents += f"###### {heading}\n{_numpy_seealso(content)}" 202 else: 203 contents += f"###### {heading}\n{dedent(content)}" 204 contents += tail 205 return contents 206 207 208def _numpy_seealso(content: str) -> str: 209 """Convert a NumPy-style "See Also" section into Markdown""" 210 contents = "" 211 for item in _indented_list(content): 212 if ":" in item: 213 funcstr, desc = item.split(":", maxsplit=1) 214 desc = f": {desc}" 215 else: 216 funcstr, desc = item, "" 217 218 funclist = [f.strip() for f in funcstr.split(" ")] 219 funcs = ", ".join(f"`{f}`" for f in funclist if f) 220 contents += f"{funcs}{desc} \n" 221 return contents 222 223 224def _numpy_parameters(content: str) -> str: 225 """Convert a NumPy-style parameter section into Markdown""" 226 contents = "" 227 for item in _indented_list(content): 228 m = re.match(r"^(.+):(.+)([\s\S]*)", item) 229 if m: 230 contents += ( 231 f" - **{m.group(1).strip()}** ({m.group(2).strip()}):\n" 232 f"{indent(m.group(3).strip(), ' ')}\n" 233 ) 234 else: 235 if "\n" in item: 236 name, desc = item.split("\n", maxsplit=1) 237 name = name.strip() 238 desc = desc.strip() 239 else: 240 name, desc = item.strip(), "" 241 242 if desc: 243 contents += f" - **{name}**: {desc}\n" 244 else: 245 contents += f" - **{name}**\n" 246 return f"{contents}\n" 247 248 249def rst(contents: str, source_file: Path | None) -> str: 250 """ 251 Convert reStructuredText elements to Markdown. 252 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 253 """ 254 contents = _rst_admonitions(contents, source_file) 255 contents = _rst_links(contents) 256 257 def replace_reference(m): 258 _, kind, name = m.groups() 259 if kind in ("meth", "func"): 260 return f"`{name}()`" 261 else: 262 return f"`{name}`" 263 264 # Code References: :obj:`foo` -> `foo` 265 contents = re.sub( 266 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):`([^`]+)`", 267 replace_reference, 268 contents, 269 ) 270 271 # Math: :math:`foo` -> \\( foo \\) 272 # We don't use $ as that's not enabled by MathJax by default. 273 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 274 275 contents = _rst_footnotes(contents) 276 277 contents = _rst_fields(contents) 278 279 return contents 280 281 282def _rst_footnotes(contents: str) -> str: 283 """Convert reStructuredText footnotes""" 284 footnotes: set[str] = set() 285 autonum: int 286 287 def register_footnote(m: re.Match[str]) -> str: 288 nonlocal autonum 289 fn_id = m.group("id") 290 if fn_id in "*#": 291 fn_id = f"fn-{autonum}" 292 autonum += 1 293 fn_id = fn_id.lstrip("#*") 294 footnotes.add(fn_id) 295 content = indent(m.group("content"), " ").lstrip() 296 return f"{m.group('indent')}[^{fn_id}]: {content}" 297 298 # Register footnotes 299 autonum = 1 300 contents = re.sub( 301 r""" 302 ^(?P<indent>[ ]*)\.\.[ ]+\[(?P<id>\d+|[#*]\w*)](?P<content>.* 303 ( 304 \n # empty lines 305 | # or 306 (?P=indent)[ ]+.+ # lines with indentation 307 )*)$ 308 """, 309 register_footnote, 310 contents, 311 flags=re.MULTILINE | re.VERBOSE, 312 ) 313 314 def replace_references(m: re.Match[str]) -> str: 315 nonlocal autonum 316 fn_id = m.group("id") 317 if fn_id in "*#": 318 fn_id = f"fn-{autonum}" 319 autonum += 1 320 fn_id = fn_id.lstrip("#*") 321 if fn_id in footnotes: 322 return f"[^{fn_id}]" 323 else: 324 return m.group(0) 325 326 autonum = 1 327 contents = re.sub(r"\[(?P<id>\d+|[#*]\w*)]_", replace_references, contents) 328 return contents 329 330 331def _rst_links(contents: str) -> str: 332 """Convert reStructuredText hyperlinks""" 333 links = {} 334 335 def register_link(m: re.Match[str]) -> str: 336 refid = re.sub(r"\s", "", m.group("id").lower()) 337 links[refid] = m.group("url") 338 return "" 339 340 def replace_link(m: re.Match[str]) -> str: 341 text = m.group("id") 342 refid = re.sub(r"[\s`]", "", text.lower()) 343 try: 344 return f"[{text.strip('`')}]({links[refid]})" 345 except KeyError: 346 return m.group(0) 347 348 # Embedded URIs 349 contents = re.sub( 350 r"`(?P<text>[^`]+)<(?P<url>.+?)>`_", r"[\g<text>](\g<url>)", contents 351 ) 352 # External Hyperlink Targets 353 contents = re.sub( 354 r"^\s*..\s+_(?P<id>[^\n:]+):\s*(?P<url>http\S+)", 355 register_link, 356 contents, 357 flags=re.MULTILINE, 358 ) 359 contents = re.sub(r"(?P<id>[A-Za-z0-9_\-.:+]|`[^`]+`)_", replace_link, contents) 360 return contents 361 362 363def _rst_extract_options(contents: str) -> tuple[str, dict[str, str]]: 364 """ 365 Extract options from the beginning of reStructuredText directives. 366 367 Return the trimmed content and a dict of options. 368 """ 369 options = {} 370 while match := re.match(r"^\s*:(.+?):(.*)([\s\S]*)", contents): 371 key, value, contents = match.groups() 372 options[key] = value.strip() 373 374 return contents, options 375 376 377def _rst_include_trim(contents: str, options: dict[str, str]) -> str: 378 """ 379 <https://docutils.sourceforge.io/docs/ref/rst/directives.html#include-options> 380 """ 381 if "end-line" in options or "start-line" in options: 382 lines = contents.splitlines() 383 if i := options.get("end-line"): 384 lines = lines[: int(i)] 385 if i := options.get("start-line"): 386 lines = lines[int(i) :] 387 contents = "\n".join(lines) 388 if x := options.get("end-before"): 389 contents = contents[: contents.index(x)] 390 if x := options.get("start-after"): 391 contents = contents[contents.index(x) + len(x) :] 392 return contents 393 394 395def _rst_admonitions(contents: str, source_file: Path | None) -> str: 396 """ 397 Convert reStructuredText admonitions - a bit tricky because they may already be indented themselves. 398 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html> 399 """ 400 401 def _rst_admonition(m: re.Match[str]) -> str: 402 ind = m.group("indent") 403 type = m.group("type") 404 val = m.group("val").strip() 405 contents = dedent(m.group("contents")).strip() 406 contents, options = _rst_extract_options(contents) 407 408 if type == "include": 409 loc = source_file or Path(".") 410 try: 411 included = (loc.parent / val).read_text("utf8", "replace") 412 except OSError as e: 413 warnings.warn(f"Cannot include {val!r}: {e}") 414 included = "\n" 415 try: 416 included = _rst_include_trim(included, options) + "\n" 417 except ValueError as e: 418 warnings.warn(f"Failed to process include options for {val!r}: {e}") 419 included = _rst_admonitions(included, loc.parent / val) 420 included = embed_images(included, loc.parent / val) 421 return indent(included, ind) 422 if type == "math": 423 return f"{ind}$${val}{contents}$$\n" 424 if type in ("note", "warning", "danger"): 425 if val: 426 heading = f"{ind}###### {val}\n" 427 else: 428 heading = "" 429 return ( 430 f'{ind}<div class="alert {type}" markdown="1">\n' 431 f"{heading}" 432 f"{indent(contents, ind)}\n" 433 f"{ind}</div>\n" 434 ) 435 if type == "code-block": 436 return f"{ind}```{val}\n{contents}\n```\n" 437 if type == "versionadded": 438 text = f"New in version {val}" 439 elif type == "versionchanged": 440 text = f"Changed in version {val}" 441 elif type == "deprecated": 442 text = f"Deprecated since version {val}" 443 else: 444 text = f"{type} {val}".strip() 445 446 if contents: 447 text = f"{ind}*{text}:*\n{indent(contents, ind)}\n\n" 448 else: 449 text = f"{ind}*{text}.*\n" 450 451 return text 452 453 admonition = "note|warning|danger|versionadded|versionchanged|deprecated|seealso|math|include|code-block" 454 return re.sub( 455 rf""" 456 ^(?P<indent>[ ]*)\.\.[ ]+(?P<type>{admonition})::(?P<val>.*) 457 (?P<contents>( 458 \n # empty lines 459 | # or 460 (?P=indent)[ ]+.+ # lines with indentation 461 )*)$ 462 """, 463 _rst_admonition, 464 contents, 465 flags=re.MULTILINE | re.VERBOSE, 466 ) 467 468 469def _rst_fields(contents: str) -> str: 470 """ 471 Convert reStructuredText fields to Markdown. 472 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#rst-field-lists> 473 """ 474 475 _has_parameter_section = False 476 _has_raises_section = False 477 478 def _rst_field(m: re.Match[str]) -> str: 479 type = m["type"] 480 body = m["body"] 481 482 if m["name"]: 483 name = f"**{m['name'].strip()}**: " 484 else: 485 name = "" 486 487 if type == "param": 488 nonlocal _has_parameter_section 489 text = f" - {name}{body}" 490 if not _has_parameter_section: 491 _has_parameter_section = True 492 text = "\n###### Parameters\n" + text 493 return text 494 elif type == "type": 495 return "" # we expect users to use modern type annotations. 496 elif type == "return": 497 body = indent(body, "> ", lambda line: True) 498 return f"\n###### Returns\n{body}" 499 elif type == "rtype": 500 return "" # we expect users to use modern type annotations. 501 elif type == "raises": 502 nonlocal _has_raises_section 503 text = f" - {name}{body}" 504 if not _has_raises_section: 505 _has_raises_section = True 506 text = "\n###### Raises\n" + text 507 return text 508 else: # pragma: no cover 509 raise AssertionError("unreachable") 510 511 field = "param|type|return|rtype|raises" 512 return re.sub( 513 rf""" 514 ^:(?P<type>{field})(?:[ ]+(?P<name>.+))?: 515 (?P<body>.*( 516 (?:\n[ ]*)* # maybe some empty lines followed by 517 [ ]+.+ # lines with indentation 518 )*(?:\n|$)) 519 """, 520 _rst_field, 521 contents, 522 flags=re.MULTILINE | re.VERBOSE, 523 )
30@cache 31def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 32 """ 33 Convert `docstring` from `docformat` to Markdown. 34 """ 35 docformat = docformat.lower() 36 37 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 38 docstring = rst(docstring, source_file) 39 40 if "google" in docformat: 41 docstring = google(docstring) 42 43 if "numpy" in docformat: 44 docstring = numpy(docstring) 45 46 if source_file is not None and os.environ.get("PDOC_EMBED_IMAGES") != "0": 47 docstring = embed_images(docstring, source_file) 48 49 return docstring
Convert docstring
from docformat
to Markdown.
52def embed_images(docstring: str, source_file: Path) -> str: 53 def embed_local_image(m: re.Match) -> str: 54 image_path = source_file.parent / m["href"] 55 try: 56 image_data = image_path.read_bytes() 57 image_mime = mimetypes.guess_type(image_path)[0] 58 except Exception: 59 return m[0] 60 else: 61 data = base64.b64encode(image_data).decode() 62 return f"![{m['alt']}](data:{image_mime};base64,{data})" 63 64 return re.sub( 65 r"!\[\s*(?P<alt>.*?)\s*]\(\s*(?P<href>.+?)\s*\)", 66 embed_local_image, 67 docstring, 68 ) 69 # TODO: Could probably do more here, e.g. support rST or raw HTML replacements.
72def google(docstring: str) -> str: 73 """Convert Google-style docstring sections into Markdown.""" 74 return re.sub( 75 r""" 76 ^(?P<name>[A-Z][A-Z a-z]+):\n 77 (?P<contents>( 78 \n # empty lines 79 | # or 80 [ \t]+.+ # lines with indentation 81 )+)$ 82 """, 83 _google_section, 84 docstring, 85 flags=re.VERBOSE | re.MULTILINE, 86 )
Convert Google-style docstring sections into Markdown.
Section headers listed in the official Google docstring style guide.
Alternative section headers that are not listed in the official Google docstring style guide but that we recognize as sections containing lists nevertheless.
168def numpy(docstring: str) -> str: 169 """Convert NumPy-style docstring sections into Markdown. 170 171 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 172 """ 173 sections = re.split( 174 r""" 175 ^([A-Z][A-Za-z ]+)\n # a heading 176 ---+\n+ # followed by a dashed line 177 """, 178 docstring, 179 flags=re.VERBOSE | re.MULTILINE, 180 ) 181 contents = sections[0] 182 for heading, content in zip(sections[1::2], sections[2::2]): 183 if content.startswith(" "): 184 # If the first line of section content is indented, we consider the section to be finished 185 # on the first non-indented line. We take out the rest - the tail - here. 186 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 187 else: 188 tail = "" 189 190 if heading in ( 191 "Parameters", 192 "Returns", 193 "Yields", 194 "Receives", 195 "Other Parameters", 196 "Raises", 197 "Warns", 198 "Attributes", 199 ): 200 contents += f"###### {heading}\n{_numpy_parameters(content)}" 201 elif heading == "See Also": 202 contents += f"###### {heading}\n{_numpy_seealso(content)}" 203 else: 204 contents += f"###### {heading}\n{dedent(content)}" 205 contents += tail 206 return contents
Convert NumPy-style docstring sections into Markdown.
See https://numpydoc.readthedocs.io/en/latest/format.html for details.
250def rst(contents: str, source_file: Path | None) -> str: 251 """ 252 Convert reStructuredText elements to Markdown. 253 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 254 """ 255 contents = _rst_admonitions(contents, source_file) 256 contents = _rst_links(contents) 257 258 def replace_reference(m): 259 _, kind, name = m.groups() 260 if kind in ("meth", "func"): 261 return f"`{name}()`" 262 else: 263 return f"`{name}`" 264 265 # Code References: :obj:`foo` -> `foo` 266 contents = re.sub( 267 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):`([^`]+)`", 268 replace_reference, 269 contents, 270 ) 271 272 # Math: :math:`foo` -> \\( foo \\) 273 # We don't use $ as that's not enabled by MathJax by default. 274 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 275 276 contents = _rst_footnotes(contents) 277 278 contents = _rst_fields(contents) 279 280 return contents
Convert reStructuredText elements to Markdown. We support the most common elements, but we do not aim to mirror the full complexity of the spec here.