pdoc.docstrings
This module handles the conversion of docstring flavors to Markdown.
The conversion from docstring flavors to Markdown is mostly done with regular expressions. This is not particularly beautiful, but good enough for our purposes. The alternative would be to depend on https://github.com/rr-/docstring_parser or a similar project, but that introduces more complexity than we are comfortable with.
If you miss a particular feature for your favorite flavor, contributions are welcome.
That being said, please keep the complexity low and make sure that changes are
accompanied by matching snapshot tests in test/testdata/
.
1""" 2This module handles the conversion of docstring flavors to Markdown. 3 4The conversion from docstring flavors to Markdown is mostly done with regular expressions. 5This is not particularly beautiful, but good enough for our purposes. 6The alternative would be to depend on <https://github.com/rr-/docstring_parser> or a similar project, 7but that introduces more complexity than we are comfortable with. 8 9If you miss a particular feature for your favorite flavor, contributions are welcome. 10That being said, please keep the complexity low and make sure that changes are 11accompanied by matching snapshot tests in `test/testdata/`. 12""" 13from __future__ import annotations 14 15import base64 16import inspect 17import mimetypes 18import os 19import re 20import warnings 21from pathlib import Path 22from textwrap import dedent 23from textwrap import indent 24 25from ._compat import cache 26 27 28@cache 29def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 30 """ 31 Convert `docstring` from `docformat` to Markdown. 32 """ 33 docformat = docformat.lower() 34 35 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 36 docstring = rst(docstring, source_file) 37 38 if "google" in docformat: 39 docstring = google(docstring) 40 41 if "numpy" in docformat: 42 docstring = numpy(docstring) 43 44 if source_file is not None and os.environ.get("PDOC_EMBED_IMAGES") != "0": 45 docstring = embed_images(docstring, source_file) 46 47 return docstring 48 49 50def embed_images(docstring: str, source_file: Path) -> str: 51 def embed_local_image(m: re.Match) -> str: 52 image_path = source_file.parent / m["href"] 53 try: 54 image_data = image_path.read_bytes() 55 image_mime = mimetypes.guess_type(image_path)[0] 56 except Exception: 57 return m[0] 58 else: 59 data = base64.b64encode(image_data).decode() 60 return f"![{m['alt']}](data:{image_mime};base64,{data})" 61 62 return re.sub( 63 r"!\[\s*(?P<alt>.*?)\s*]\(\s*(?P<href>.+?)\s*\)", 64 embed_local_image, 65 docstring, 66 ) 67 # TODO: Could probably do more here, e.g. support rST or raw HTML replacements. 68 69 70def google(docstring: str) -> str: 71 """Convert Google-style docstring sections into Markdown.""" 72 return re.sub( 73 r""" 74 ^(?P<name>[A-Z][A-Z a-z]+):\n 75 (?P<contents>( 76 \n # empty lines 77 | # or 78 [ \t]+.+ # lines with indentation 79 )+)$ 80 """, 81 _google_section, 82 docstring, 83 flags=re.VERBOSE | re.MULTILINE, 84 ) 85 86 87GOOGLE_LIST_SECTIONS = ["Args", "Raises", "Attributes"] 88"""Section headers listed in the official Google docstring style guide.""" 89 90GOOGLE_LIST_SECTION_ALIASES = { 91 "Parameters": "Args", 92 "Params": "Args", 93 "Arguments": "Args", 94} 95""" 96Alternative section headers that are not listed in the official Google 97docstring style guide but that we recognize as sections containing lists 98nevertheless. 99""" 100 101 102def _google_section(m: re.Match[str]) -> str: 103 name = m.group("name") 104 contents = dedent(m.group("contents")).lstrip() 105 106 if name in GOOGLE_LIST_SECTION_ALIASES: 107 name = GOOGLE_LIST_SECTION_ALIASES[name] 108 109 if name in GOOGLE_LIST_SECTIONS: 110 items = _indented_list(contents) 111 contents = "" 112 for item in items: 113 try: 114 # first ":" on the first line 115 _, attr, desc = re.split(r"^(.+?:)", item, maxsplit=1) 116 except ValueError: 117 contents += " - " + indent(item, " ")[3:] 118 else: 119 contents += f" - **{attr}** " + indent(desc, " ")[3:] 120 contents += "\n" 121 else: 122 contents = indent(contents, "> ", lambda line: True) 123 124 if name == "Args": 125 name = "Arguments" 126 127 return f"\n###### {name}:\n{contents}\n" 128 129 130def _indented_list(contents: str) -> list[str]: 131 """ 132 Convert a list string into individual (dedented) elements. For example, 133 134 foo: 135 desc 136 bar: int 137 more desc 138 baz: 139 desc 140 indented 141 142 returns [ 143 "foo:\ndesc", 144 "bar: int\nmore desc", 145 "baz:\ndesc\n indented", 146 ] 147 """ 148 # we expect this to be through cleandoc() already. 149 assert not contents.startswith(" "), contents 150 assert not contents.startswith("\n"), contents 151 152 ret: list[str] = [] 153 for line in contents.splitlines(keepends=True): 154 empty = not line.strip() 155 indented = line.startswith(" ") 156 if not (empty or indented): 157 # new section 158 ret.append(line) 159 else: 160 # append to current section 161 ret[-1] += line 162 163 return [inspect.cleandoc(x) for x in ret] 164 165 166def numpy(docstring: str) -> str: 167 """Convert NumPy-style docstring sections into Markdown. 168 169 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 170 """ 171 sections = re.split( 172 r""" 173 ^([A-Z][A-Za-z ]+)\n # a heading 174 ---+\n+ # followed by a dashed line 175 """, 176 docstring, 177 flags=re.VERBOSE | re.MULTILINE, 178 ) 179 contents = sections[0] 180 for heading, content in zip(sections[1::2], sections[2::2]): 181 if content.startswith(" "): 182 # If the first line of section content is indented, we consider the section to be finished 183 # on the first non-indented line. We take out the rest - the tail - here. 184 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 185 else: 186 tail = "" 187 188 if heading in ( 189 "Parameters", 190 "Returns", 191 "Yields", 192 "Receives", 193 "Other Parameters", 194 "Raises", 195 "Warns", 196 "Attributes", 197 ): 198 contents += f"###### {heading}\n{_numpy_parameters(content)}" 199 elif heading == "See Also": 200 contents += f"###### {heading}\n{_numpy_seealso(content)}" 201 else: 202 contents += f"###### {heading}\n{dedent(content)}" 203 contents += tail 204 return contents 205 206 207def _numpy_seealso(content: str) -> str: 208 """Convert a NumPy-style "See Also" section into Markdown""" 209 contents = "" 210 for item in _indented_list(content): 211 if ":" in item: 212 funcstr, desc = item.split(":", maxsplit=1) 213 desc = f": {desc}" 214 else: 215 funcstr, desc = item, "" 216 217 funclist = [f.strip() for f in funcstr.split(" ")] 218 funcs = ", ".join(f"`{f}`" for f in funclist if f) 219 contents += f"{funcs}{desc} \n" 220 return contents 221 222 223def _numpy_parameters(content: str) -> str: 224 """Convert a NumPy-style parameter section into Markdown""" 225 contents = "" 226 for item in _indented_list(content): 227 m = re.match(r"^(.+):(.+)([\s\S]*)", item) 228 if m: 229 contents += ( 230 f" - **{m.group(1).strip()}** ({m.group(2).strip()}):\n" 231 f"{indent(m.group(3).strip(), ' ')}\n" 232 ) 233 else: 234 if "\n" in item: 235 name, desc = item.split("\n", maxsplit=1) 236 name = name.strip() 237 desc = desc.strip() 238 else: 239 name, desc = item.strip(), "" 240 241 if desc: 242 contents += f" - **{name}**: {desc}\n" 243 else: 244 contents += f" - **{name}**\n" 245 return f"{contents}\n" 246 247 248def rst(contents: str, source_file: Path | None) -> str: 249 """ 250 Convert reStructuredText elements to Markdown. 251 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 252 """ 253 contents = _rst_admonitions(contents, source_file) 254 contents = _rst_links(contents) 255 256 def replace_reference(m): 257 _, kind, name = m.groups() 258 if kind in ("meth", "func"): 259 return f"`{name}()`" 260 else: 261 return f"`{name}`" 262 263 # Code References: :obj:`foo` -> `foo` 264 contents = re.sub( 265 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):`([^`]+)`", 266 replace_reference, 267 contents, 268 ) 269 270 # Math: :math:`foo` -> \\( foo \\) 271 # We don't use $ as that's not enabled by MathJax by default. 272 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 273 274 contents = _rst_footnotes(contents) 275 276 contents = _rst_fields(contents) 277 278 return contents 279 280 281def _rst_footnotes(contents: str) -> str: 282 """Convert reStructuredText footnotes""" 283 footnotes: set[str] = set() 284 autonum: int 285 286 def register_footnote(m: re.Match[str]) -> str: 287 nonlocal autonum 288 fn_id = m.group("id") 289 if fn_id in "*#": 290 fn_id = f"fn-{autonum}" 291 autonum += 1 292 fn_id = fn_id.lstrip("#*") 293 footnotes.add(fn_id) 294 content = indent(m.group("content"), " ").lstrip() 295 return f"{m.group('indent')}[^{fn_id}]: {content}" 296 297 # Register footnotes 298 autonum = 1 299 contents = re.sub( 300 r""" 301 ^(?P<indent>[ ]*)\.\.[ ]+\[(?P<id>\d+|[#*]\w*)](?P<content>.* 302 ( 303 \n # empty lines 304 | # or 305 (?P=indent)[ ]+.+ # lines with indentation 306 )*)$ 307 """, 308 register_footnote, 309 contents, 310 flags=re.MULTILINE | re.VERBOSE, 311 ) 312 313 def replace_references(m: re.Match[str]) -> str: 314 nonlocal autonum 315 fn_id = m.group("id") 316 if fn_id in "*#": 317 fn_id = f"fn-{autonum}" 318 autonum += 1 319 fn_id = fn_id.lstrip("#*") 320 if fn_id in footnotes: 321 return f"[^{fn_id}]" 322 else: 323 return m.group(0) 324 325 autonum = 1 326 contents = re.sub(r"\[(?P<id>\d+|[#*]\w*)]_", replace_references, contents) 327 return contents 328 329 330def _rst_links(contents: str) -> str: 331 """Convert reStructuredText hyperlinks""" 332 links = {} 333 334 def register_link(m: re.Match[str]) -> str: 335 refid = re.sub(r"\s", "", m.group("id").lower()) 336 links[refid] = m.group("url") 337 return "" 338 339 def replace_link(m: re.Match[str]) -> str: 340 text = m.group("id") 341 refid = re.sub(r"[\s`]", "", text.lower()) 342 try: 343 return f"[{text.strip('`')}]({links[refid]})" 344 except KeyError: 345 return m.group(0) 346 347 # Embedded URIs 348 contents = re.sub( 349 r"`(?P<text>[^`]+)<(?P<url>.+?)>`_", r"[\g<text>](\g<url>)", contents 350 ) 351 # External Hyperlink Targets 352 contents = re.sub( 353 r"^\s*..\s+_(?P<id>[^\n:]+):\s*(?P<url>http\S+)", 354 register_link, 355 contents, 356 flags=re.MULTILINE, 357 ) 358 contents = re.sub(r"(?P<id>[A-Za-z0-9_\-.:+]|`[^`]+`)_", replace_link, contents) 359 return contents 360 361 362def _rst_admonitions(contents: str, source_file: Path | None) -> str: 363 """ 364 Convert reStructuredText admonitions - a bit tricky because they may already be indented themselves. 365 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html> 366 """ 367 368 def _rst_admonition(m: re.Match[str]) -> str: 369 ind = m.group("indent") 370 type = m.group("type") 371 val = m.group("val").strip() 372 contents = dedent(m.group("contents")).strip() 373 374 if type == "include": 375 loc = source_file or Path(".") 376 try: 377 included = (loc.parent / val).read_text("utf8", "replace") 378 except OSError as e: 379 warnings.warn(f"Cannot include {val!r}: {e}") 380 included = "\n" 381 included = _rst_admonitions(included, loc.parent / val) 382 return indent(included, ind) 383 if type == "math": 384 return f"{ind}$${val}{contents}$$\n" 385 if type in ("note", "warning", "danger"): 386 if val: 387 heading = f"{ind}###### {val}\n" 388 else: 389 heading = "" 390 return ( 391 f'{ind}<div class="pdoc-alert pdoc-alert-{type}" markdown="1">\n' 392 f"{heading}" 393 f"{indent(contents, ind)}\n" 394 f"{ind}</div>\n" 395 ) 396 elif type == "versionadded": 397 text = f"New in version {val}" 398 elif type == "versionchanged": 399 text = f"Changed in version {val}" 400 elif type == "deprecated": 401 text = f"Deprecated since version {val}" 402 else: 403 text = f"{type} {val}".strip() 404 405 if contents: 406 text = f"{ind}*{text}:*\n{indent(contents, ind)}\n\n" 407 else: 408 text = f"{ind}*{text}.*\n" 409 410 return text 411 412 admonition = "note|warning|danger|versionadded|versionchanged|deprecated|seealso|math|include" 413 return re.sub( 414 rf""" 415 ^(?P<indent>[ ]*)\.\.[ ]+(?P<type>{admonition})::(?P<val>.*) 416 (?P<contents>( 417 \n # empty lines 418 | # or 419 (?P=indent)[ ]+.+ # lines with indentation 420 )*)$ 421 """, 422 _rst_admonition, 423 contents, 424 flags=re.MULTILINE | re.VERBOSE, 425 ) 426 427 428def _rst_fields(contents: str) -> str: 429 """ 430 Convert reStructuredText fields to Markdown. 431 <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#rst-field-lists> 432 """ 433 434 _has_parameter_section = False 435 _has_raises_section = False 436 437 def _rst_field(m: re.Match[str]) -> str: 438 type = m["type"] 439 body = m["body"] 440 441 if m["name"]: 442 name = f"**{m['name'].strip()}**: " 443 else: 444 name = "" 445 446 if type == "param": 447 nonlocal _has_parameter_section 448 text = f" - {name}{body}" 449 if not _has_parameter_section: 450 _has_parameter_section = True 451 text = "\n###### Parameters\n" + text 452 return text 453 elif type == "type": 454 return "" # we expect users to use modern type annotations. 455 elif type == "return": 456 body = indent(body, "> ", lambda line: True) 457 return f"\n###### Returns\n{body}" 458 elif type == "rtype": 459 return "" # we expect users to use modern type annotations. 460 elif type == "raises": 461 nonlocal _has_raises_section 462 text = f" - {name}{body}" 463 if not _has_raises_section: 464 _has_raises_section = True 465 text = "\n###### Raises\n" + text 466 return text 467 else: # pragma: no cover 468 raise AssertionError("unreachable") 469 470 field = "param|type|return|rtype|raises" 471 return re.sub( 472 rf""" 473 ^:(?P<type>{field})(?:[ ]+(?P<name>.+))?: 474 (?P<body>.*( 475 (?:\n[ ]*)* # maybe some empty lines followed by 476 [ ]+.+ # lines with indentation 477 )*(?:\n|$)) 478 """, 479 _rst_field, 480 contents, 481 flags=re.MULTILINE | re.VERBOSE, 482 )
29@cache 30def convert(docstring: str, docformat: str, source_file: Path | None) -> str: 31 """ 32 Convert `docstring` from `docformat` to Markdown. 33 """ 34 docformat = docformat.lower() 35 36 if any(x in docformat for x in ["google", "numpy", "restructuredtext"]): 37 docstring = rst(docstring, source_file) 38 39 if "google" in docformat: 40 docstring = google(docstring) 41 42 if "numpy" in docformat: 43 docstring = numpy(docstring) 44 45 if source_file is not None and os.environ.get("PDOC_EMBED_IMAGES") != "0": 46 docstring = embed_images(docstring, source_file) 47 48 return docstring
Convert docstring
from docformat
to Markdown.
51def embed_images(docstring: str, source_file: Path) -> str: 52 def embed_local_image(m: re.Match) -> str: 53 image_path = source_file.parent / m["href"] 54 try: 55 image_data = image_path.read_bytes() 56 image_mime = mimetypes.guess_type(image_path)[0] 57 except Exception: 58 return m[0] 59 else: 60 data = base64.b64encode(image_data).decode() 61 return f"![{m['alt']}](data:{image_mime};base64,{data})" 62 63 return re.sub( 64 r"!\[\s*(?P<alt>.*?)\s*]\(\s*(?P<href>.+?)\s*\)", 65 embed_local_image, 66 docstring, 67 ) 68 # TODO: Could probably do more here, e.g. support rST or raw HTML replacements.
71def google(docstring: str) -> str: 72 """Convert Google-style docstring sections into Markdown.""" 73 return re.sub( 74 r""" 75 ^(?P<name>[A-Z][A-Z a-z]+):\n 76 (?P<contents>( 77 \n # empty lines 78 | # or 79 [ \t]+.+ # lines with indentation 80 )+)$ 81 """, 82 _google_section, 83 docstring, 84 flags=re.VERBOSE | re.MULTILINE, 85 )
Convert Google-style docstring sections into Markdown.
Section headers listed in the official Google docstring style guide.
Alternative section headers that are not listed in the official Google docstring style guide but that we recognize as sections containing lists nevertheless.
167def numpy(docstring: str) -> str: 168 """Convert NumPy-style docstring sections into Markdown. 169 170 See <https://numpydoc.readthedocs.io/en/latest/format.html> for details. 171 """ 172 sections = re.split( 173 r""" 174 ^([A-Z][A-Za-z ]+)\n # a heading 175 ---+\n+ # followed by a dashed line 176 """, 177 docstring, 178 flags=re.VERBOSE | re.MULTILINE, 179 ) 180 contents = sections[0] 181 for heading, content in zip(sections[1::2], sections[2::2]): 182 if content.startswith(" "): 183 # If the first line of section content is indented, we consider the section to be finished 184 # on the first non-indented line. We take out the rest - the tail - here. 185 content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1) 186 else: 187 tail = "" 188 189 if heading in ( 190 "Parameters", 191 "Returns", 192 "Yields", 193 "Receives", 194 "Other Parameters", 195 "Raises", 196 "Warns", 197 "Attributes", 198 ): 199 contents += f"###### {heading}\n{_numpy_parameters(content)}" 200 elif heading == "See Also": 201 contents += f"###### {heading}\n{_numpy_seealso(content)}" 202 else: 203 contents += f"###### {heading}\n{dedent(content)}" 204 contents += tail 205 return contents
Convert NumPy-style docstring sections into Markdown.
See https://numpydoc.readthedocs.io/en/latest/format.html for details.
249def rst(contents: str, source_file: Path | None) -> str: 250 """ 251 Convert reStructuredText elements to Markdown. 252 We support the most common elements, but we do not aim to mirror the full complexity of the spec here. 253 """ 254 contents = _rst_admonitions(contents, source_file) 255 contents = _rst_links(contents) 256 257 def replace_reference(m): 258 _, kind, name = m.groups() 259 if kind in ("meth", "func"): 260 return f"`{name}()`" 261 else: 262 return f"`{name}`" 263 264 # Code References: :obj:`foo` -> `foo` 265 contents = re.sub( 266 r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):`([^`]+)`", 267 replace_reference, 268 contents, 269 ) 270 271 # Math: :math:`foo` -> \\( foo \\) 272 # We don't use $ as that's not enabled by MathJax by default. 273 contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents) 274 275 contents = _rst_footnotes(contents) 276 277 contents = _rst_fields(contents) 278 279 return contents
Convert reStructuredText elements to Markdown. We support the most common elements, but we do not aim to mirror the full complexity of the spec here.