pdoc.docstrings
This module handles the conversion of docstring flavors to Markdown.
The conversion from docstring flavors to Markdown is mostly done with regular expressions. This is not particularly beautiful, but good enough for our purposes. The alternative would be to depend on https://github.com/rr-/docstring_parser or a similar project, but that introduces more complexity than we are comfortable with.
If you miss a particular feature for your favorite flavor, contributions are welcome.
That being said, please keep the complexity low and make sure that changes are
accompanied by matching snapshot tests in test/testdata/
.
1""" 2This module handles the conversion of docstring flavors to Markdown. 3 4The conversion from docstring flavors to Markdown is mostly done with regular expressions. 5This is not particularly beautiful, but good enough for our purposes. 6The alternative would be to depend on <https://github.com/rr-/docstring_parser> or a similar project, 7but that introduces more complexity than we are comfortable with. 8 9If you miss a particular feature for your favorite flavor, contributions are welcome. 10That being said, please keep the complexity low and make sure that changes are 11accompanied by matching snapshot tests in `test/testdata/`. 12""" 13 14from __future__ import annotations 15 16import base64 17import inspect 18import mimetypes 19import os 20from pathlib import Path 21import re 22from textwrap import dedent 23from textwrap import indent 24import warnings 25 26from ._compat import cache 27 28 29@cache 30def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 31 """ 32 Convert `docstring` from `docformat` to Markdown. 33 """ 34 docformat = docformat.lower() 35 36 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 37 docstring = rst(docstring, source_file) 38 39 if "google" in docformat: 40 docstring = google(docstring) 41 42 if "numpy" in docformat: 43 docstring = numpy(docstring) 44 45 if source_file is not None and os.environ.get("PDOC_EMBED_IMAGES") != "0": 46 docstring = embed_images(docstring, source_file) 47 48 return docstring 49 50 51def embed_images(docstring: str, source_file: Path) -> str: 52 def embed_local_image(m: re.Match) -> str: 53 image_path = source_file.parent / m["href"] 54 try: 55 image_data = image_path.read_bytes() 56 image_mime = mimetypes.guess_type(image_path)[0] 57 except Exception: 58 return m[0] 59 else: 60 data = base64.b64encode(image_data).decode() 61 return f"![{m['alt']}](data:{image_mime};base64,{data})" 62 63 return re.sub( 64 r"!\[\s*(?P<alt>.*?)\s*]\(\s*(?P<href>.+?)\s*\)", 65 embed_local_image, 66 docstring, 67 ) 68 # TODO: Could probably do more here, e.g. support rST or raw HTML replacements. 69 70 71def google(docstring: str) -> str: 72 """Convert Google-style docstring sections into Markdown.""" 73 return re.sub( 74 r""" 75 ^(?P<name>[A-Z][A-Z a-z]+):\n 76 (?P<contents>( 77 \n # empty lines 78 | # or 79 [ \t]+.+ # lines with indentation 80 )+)$ 81 """, 82 _google_section, 83 docstring, 84 flags=re.VERBOSE | re.MULTILINE, 85 ) 86 87 88GOOGLE_LIST_SECTIONS = ["Args", "Raises", "Attributes"] 89"""Section headers listed in the official Google docstring style guide.""" 90 91GOOGLE_LIST_SECTION_ALIASES = { 92 "Parameters": "Args", 93 "Params": "Args", 94 "Arguments": "Args", 95} 96""" 97Alternative section headers that are not listed in the official Google 98docstring style guide but that we recognize as sections containing lists 99nevertheless. 100""" 101 102 103def _google_section(m: re.Match[str]) -> str: 104 name = m.group("name") 105 contents = dedent(m.group("contents")).lstrip() 106 107 if name in GOOGLE_LIST_SECTION_ALIASES: 108 name = GOOGLE_LIST_SECTION_ALIASES[name] 109 110 if name in GOOGLE_LIST_SECTIONS: 111 items = _indented_list(contents) 112 contents = "" 113 for item in items: 114 try: 115 # first ":" on the first line 116 _, attr, desc = re.split(r"^(.+?:)", item, maxsplit=1) 117 except ValueError: 118 contents += " - " + indent(item, " ")[3:] 119 else: 120 contents += f" - **{attr}** " + indent(desc, " ")[3:] 121 contents += "\n" 122 else: 123 contents = indent(contents, "> ", lambda line: True) 124 125 if name == "Args": 126 name = "Arguments" 127 128 return f"\n###### {name}:\n{contents}\n" 129 130 131def _indented_list(contents: str) -> list[str]: 132 """ 133 Convert a list string into individual (dedented) elements. For example, 134 135 foo: 136 desc 137 bar: int 138 more desc 139 baz: 140 desc 141 indented 142 143 returns [ 144 "foo:\ndesc", 145 "bar: int\nmore desc", 146 "baz:\ndesc\n indented", 147 ] 148 """ 149 # we expect this to be through cleandoc() already. 150 assert not contents.startswith(" "), contents 151 assert not contents.startswith("\n"), contents 152 153 ret: list[str] = [] 154 for line in contents.splitlines(keepends=True): 155 empty = not line.strip() 156 indented = line.startswith(" ") 157 if not (empty or indented): 158 # new section 159 ret.append(line) 160 else: 161 # append to current section 162 ret[-1] += line 163 164 return [inspect.cleandoc(x) for x in ret] 165 166 167def numpy(docstring: str) -> str: 168 """Convert NumPy-style docstring sections into Markdown. 169 170 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 171 """ 172 sections = re.split( 173 r""" 174 ^([A-Z][A-Za-z ]+)\n # a heading 175 ---+\n+ # followed by a dashed line 176 """, 177 docstring, 178 flags=re.VERBOSE | re.MULTILINE, 179 ) 180 contents = sections[0] 181 for heading, content in zip(sections[1::2], sections[2::2]): 182 if content.startswith(" "): 183 # If the first line of section content is indented, we consider the section to be finished 184 # on the first non-indented line. We take out the rest - the tail - here. 185 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 186 else: 187 tail = "" 188 189 if heading in ( 190 "Parameters", 191 "Returns", 192 "Yields", 193 "Receives", 194 "Other Parameters", 195 "Raises", 196 "Warns", 197 "Attributes", 198 ): 199 contents += f"###### {heading}\n{_numpy_parameters(content)}" 200 elif heading == "See Also": 201 contents += f"###### {heading}\n{_numpy_seealso(content)}" 202 else: 203 contents += f"###### {heading}\n{dedent(content)}" 204 contents += tail 205 return contents 206 207 208def _numpy_seealso(content: str) -> str: 209 """Convert a NumPy-style "See Also" section into Markdown""" 210 contents = "" 211 for item in _indented_list(content): 212 if ":" in item: 213 funcstr, desc = item.split(":", maxsplit=1) 214 desc = f": {desc}" 215 else: 216 funcstr, desc = item, "" 217 218 funclist = [f.strip() for f in funcstr.split(" ")] 219 funcs = ", ".join(f"`{f}`" for f in funclist if f) 220 contents += f"{funcs}{desc} \n" 221 return contents 222 223 224def _numpy_parameters(content: str) -> str: 225 """Convert a NumPy-style parameter section into Markdown""" 226 contents = "" 227 for item in _indented_list(content): 228 m = re.match(r"^(.+):(.+)([\s\S]*)", item) 229 if m: 230 contents += ( 231 f" - **{m.group(1).strip()}** ({m.group(2).strip()}):\n" 232 f"{indent(m.group(3).strip(), ' ')}\n" 233 ) 234 else: 235 if "\n" in item: 236 name, desc = item.split("\n", maxsplit=1) 237 name = name.strip() 238 desc = desc.strip() 239 else: 240 name, desc = item.strip(), "" 241 242 if desc: 243 contents += f" - **{name}**: {desc}\n" 244 else: 245 contents += f" - **{name}**\n" 246 return f"{contents}\n" 247 248 249def rst(contents: str, source_file: Path | None) -> str: 250 """ 251 Convert reStructuredText elements to Markdown. 252 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 253 """ 254 contents = _rst_admonitions(contents, source_file) 255 contents = _rst_links(contents) 256 257 def replace_reference(m): 258 _, kind, name = m.groups() 259 if kind in ("meth", "func"): 260 return f"`{name}()`" 261 else: 262 return f"`{name}`" 263 264 # Code References: :obj:`foo` -> `foo` 265 contents = re.sub( 266 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):`([^`]+)`", 267 replace_reference, 268 contents, 269 ) 270 271 # Math: :math:`foo` -> \\( foo \\) 272 # We don't use $ as that's not enabled by MathJax by default. 273 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 274 275 contents = _rst_footnotes(contents) 276 277 contents = _rst_fields(contents) 278 279 return contents 280 281 282def _rst_footnotes(contents: str) -> str: 283 """Convert reStructuredText footnotes""" 284 footnotes: set[str] = set() 285 autonum: int 286 287 def register_footnote(m: re.Match[str]) -> str: 288 nonlocal autonum 289 fn_id = m.group("id") 290 if fn_id in "*#": 291 fn_id = f"fn-{autonum}" 292 autonum += 1 293 fn_id = fn_id.lstrip("#*") 294 footnotes.add(fn_id) 295 content = indent(m.group("content"), " ").lstrip() 296 return f"{m.group('indent')}[^{fn_id}]: {content}" 297 298 # Register footnotes 299 autonum = 1 300 contents = re.sub( 301 r""" 302 ^(?P<indent>[ ]*)\.\.[ ]+\[(?P<id>\d+|[#*]\w*)](?P<content>.* 303 ( 304 \n # empty lines 305 | # or 306 (?P=indent)[ ]+.+ # lines with indentation 307 )*)$ 308 """, 309 register_footnote, 310 contents, 311 flags=re.MULTILINE | re.VERBOSE, 312 ) 313 314 def replace_references(m: re.Match[str]) -> str: 315 nonlocal autonum 316 fn_id = m.group("id") 317 if fn_id in "*#": 318 fn_id = f"fn-{autonum}" 319 autonum += 1 320 fn_id = fn_id.lstrip("#*") 321 if fn_id in footnotes: 322 return f"[^{fn_id}]" 323 else: 324 return m.group(0) 325 326 autonum = 1 327 contents = re.sub(r"\[(?P<id>\d+|[#*]\w*)]_", replace_references, contents) 328 return contents 329 330 331def _rst_links(contents: str) -> str: 332 """Convert reStructuredText hyperlinks""" 333 links = {} 334 335 def register_link(m: re.Match[str]) -> str: 336 refid = re.sub(r"\s", "", m.group("id").lower()) 337 links[refid] = m.group("url") 338 return "" 339 340 def replace_link(m: re.Match[str]) -> str: 341 text = m.group("id") 342 refid = re.sub(r"[\s`]", "", text.lower()) 343 try: 344 return f"[{text.strip('`')}]({links[refid]})" 345 except KeyError: 346 return m.group(0) 347 348 # Embedded URIs 349 contents = re.sub( 350 r"`(?P<text>[^`]+)<(?P<url>.+?)>`_", r"[\g<text>](\g<url>)", contents 351 ) 352 # External Hyperlink Targets 353 contents = re.sub( 354 r"^\s*..\s+_(?P<id>[^\n:]+):\s*(?P<url>http\S+)", 355 register_link, 356 contents, 357 flags=re.MULTILINE, 358 ) 359 contents = re.sub(r"(?P<id>[A-Za-z0-9_\-.:+]|`[^`]+`)_", replace_link, contents) 360 return contents 361 362 363def _rst_admonitions(contents: str, source_file: Path | None) -> str: 364 """ 365 Convert reStructuredText admonitions - a bit tricky because they may already be indented themselves. 366 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html> 367 """ 368 369 def _rst_admonition(m: re.Match[str]) -> str: 370 ind = m.group("indent") 371 type = m.group("type") 372 val = m.group("val").strip() 373 contents = dedent(m.group("contents")).strip() 374 375 if type == "include": 376 loc = source_file or Path(".") 377 try: 378 included = (loc.parent / val).read_text("utf8", "replace") 379 except OSError as e: 380 warnings.warn(f"Cannot include {val!r}: {e}") 381 included = "\n" 382 included = _rst_admonitions(included, loc.parent / val) 383 return indent(included, ind) 384 if type == "math": 385 return f"{ind}$${val}{contents}$$\n" 386 if type in ("note", "warning", "danger"): 387 if val: 388 heading = f"{ind}###### {val}\n" 389 else: 390 heading = "" 391 return ( 392 f'{ind}<div class="pdoc-alert pdoc-alert-{type}" markdown="1">\n' 393 f"{heading}" 394 f"{indent(contents, ind)}\n" 395 f"{ind}</div>\n" 396 ) 397 if type == "code-block": 398 return f"{ind}```{val}\n{contents}\n```\n" 399 if type == "versionadded": 400 text = f"New in version {val}" 401 elif type == "versionchanged": 402 text = f"Changed in version {val}" 403 elif type == "deprecated": 404 text = f"Deprecated since version {val}" 405 else: 406 text = f"{type} {val}".strip() 407 408 if contents: 409 text = f"{ind}*{text}:*\n{indent(contents, ind)}\n\n" 410 else: 411 text = f"{ind}*{text}.*\n" 412 413 return text 414 415 admonition = "note|warning|danger|versionadded|versionchanged|deprecated|seealso|math|include|code-block" 416 return re.sub( 417 rf""" 418 ^(?P<indent>[ ]*)\.\.[ ]+(?P<type>{admonition})::(?P<val>.*) 419 (?P<contents>( 420 \n # empty lines 421 | # or 422 (?P=indent)[ ]+.+ # lines with indentation 423 )*)$ 424 """, 425 _rst_admonition, 426 contents, 427 flags=re.MULTILINE | re.VERBOSE, 428 ) 429 430 431def _rst_fields(contents: str) -> str: 432 """ 433 Convert reStructuredText fields to Markdown. 434 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#rst-field-lists> 435 """ 436 437 _has_parameter_section = False 438 _has_raises_section = False 439 440 def _rst_field(m: re.Match[str]) -> str: 441 type = m["type"] 442 body = m["body"] 443 444 if m["name"]: 445 name = f"**{m['name'].strip()}**: " 446 else: 447 name = "" 448 449 if type == "param": 450 nonlocal _has_parameter_section 451 text = f" - {name}{body}" 452 if not _has_parameter_section: 453 _has_parameter_section = True 454 text = "\n###### Parameters\n" + text 455 return text 456 elif type == "type": 457 return "" # we expect users to use modern type annotations. 458 elif type == "return": 459 body = indent(body, "> ", lambda line: True) 460 return f"\n###### Returns\n{body}" 461 elif type == "rtype": 462 return "" # we expect users to use modern type annotations. 463 elif type == "raises": 464 nonlocal _has_raises_section 465 text = f" - {name}{body}" 466 if not _has_raises_section: 467 _has_raises_section = True 468 text = "\n###### Raises\n" + text 469 return text 470 else: # pragma: no cover 471 raise AssertionError("unreachable") 472 473 field = "param|type|return|rtype|raises" 474 return re.sub( 475 rf""" 476 ^:(?P<type>{field})(?:[ ]+(?P<name>.+))?: 477 (?P<body>.*( 478 (?:\n[ ]*)* # maybe some empty lines followed by 479 [ ]+.+ # lines with indentation 480 )*(?:\n|$)) 481 """, 482 _rst_field, 483 contents, 484 flags=re.MULTILINE | re.VERBOSE, 485 )
30@cache 31def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 32 """ 33 Convert `docstring` from `docformat` to Markdown. 34 """ 35 docformat = docformat.lower() 36 37 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 38 docstring = rst(docstring, source_file) 39 40 if "google" in docformat: 41 docstring = google(docstring) 42 43 if "numpy" in docformat: 44 docstring = numpy(docstring) 45 46 if source_file is not None and os.environ.get("PDOC_EMBED_IMAGES") != "0": 47 docstring = embed_images(docstring, source_file) 48 49 return docstring
Convert docstring
from docformat
to Markdown.
52def embed_images(docstring: str, source_file: Path) -> str: 53 def embed_local_image(m: re.Match) -> str: 54 image_path = source_file.parent / m["href"] 55 try: 56 image_data = image_path.read_bytes() 57 image_mime = mimetypes.guess_type(image_path)[0] 58 except Exception: 59 return m[0] 60 else: 61 data = base64.b64encode(image_data).decode() 62 return f"![{m['alt']}](data:{image_mime};base64,{data})" 63 64 return re.sub( 65 r"!\[\s*(?P<alt>.*?)\s*]\(\s*(?P<href>.+?)\s*\)", 66 embed_local_image, 67 docstring, 68 ) 69 # TODO: Could probably do more here, e.g. support rST or raw HTML replacements.
72def google(docstring: str) -> str: 73 """Convert Google-style docstring sections into Markdown.""" 74 return re.sub( 75 r""" 76 ^(?P<name>[A-Z][A-Z a-z]+):\n 77 (?P<contents>( 78 \n # empty lines 79 | # or 80 [ \t]+.+ # lines with indentation 81 )+)$ 82 """, 83 _google_section, 84 docstring, 85 flags=re.VERBOSE | re.MULTILINE, 86 )
Convert Google-style docstring sections into Markdown.
Section headers listed in the official Google docstring style guide.
Alternative section headers that are not listed in the official Google docstring style guide but that we recognize as sections containing lists nevertheless.
168def numpy(docstring: str) -> str: 169 """Convert NumPy-style docstring sections into Markdown. 170 171 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 172 """ 173 sections = re.split( 174 r""" 175 ^([A-Z][A-Za-z ]+)\n # a heading 176 ---+\n+ # followed by a dashed line 177 """, 178 docstring, 179 flags=re.VERBOSE | re.MULTILINE, 180 ) 181 contents = sections[0] 182 for heading, content in zip(sections[1::2], sections[2::2]): 183 if content.startswith(" "): 184 # If the first line of section content is indented, we consider the section to be finished 185 # on the first non-indented line. We take out the rest - the tail - here. 186 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 187 else: 188 tail = "" 189 190 if heading in ( 191 "Parameters", 192 "Returns", 193 "Yields", 194 "Receives", 195 "Other Parameters", 196 "Raises", 197 "Warns", 198 "Attributes", 199 ): 200 contents += f"###### {heading}\n{_numpy_parameters(content)}" 201 elif heading == "See Also": 202 contents += f"###### {heading}\n{_numpy_seealso(content)}" 203 else: 204 contents += f"###### {heading}\n{dedent(content)}" 205 contents += tail 206 return contents
Convert NumPy-style docstring sections into Markdown.
See https://numpydoc.readthedocs.io/en/latest/format.html for details.
250def rst(contents: str, source_file: Path | None) -> str: 251 """ 252 Convert reStructuredText elements to Markdown. 253 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 254 """ 255 contents = _rst_admonitions(contents, source_file) 256 contents = _rst_links(contents) 257 258 def replace_reference(m): 259 _, kind, name = m.groups() 260 if kind in ("meth", "func"): 261 return f"`{name}()`" 262 else: 263 return f"`{name}`" 264 265 # Code References: :obj:`foo` -> `foo` 266 contents = re.sub( 267 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):`([^`]+)`", 268 replace_reference, 269 contents, 270 ) 271 272 # Math: :math:`foo` -> \\( foo \\) 273 # We don't use $ as that's not enabled by MathJax by default. 274 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 275 276 contents = _rst_footnotes(contents) 277 278 contents = _rst_fields(contents) 279 280 return contents
Convert reStructuredText elements to Markdown. We support the most common elements, but we do not aim to mirror the full complexity of the spec here.