Edit on GitHub

pdoc.docstrings

This module handles the conversion of docstring flavors to Markdown.

The conversion from docstring flavors to Markdown is mostly done with regular expressions. This is not particularly beautiful, but good enough for our purposes. The alternative would be to depend on https://github.com/rr-/docstring_parser or a similar project, but that introduces more complexity than we are comfortable with.

If you miss a particular feature for your favorite flavor, contributions are welcome. That being said, please keep the complexity low and make sure that changes are accompanied by matching snapshot tests in test/testdata/.

  1"""
  2This module handles the conversion of docstring flavors to Markdown.
  3
  4The conversion from docstring flavors to Markdown is mostly done with regular expressions.
  5This is not particularly beautiful, but good enough for our purposes.
  6The alternative would be to depend on <https://github.com/rr-/docstring_parser> or a similar project,
  7but that introduces more complexity than we are comfortable with.
  8
  9If you miss a particular feature for your favorite flavor, contributions are welcome.
 10That being said, please keep the complexity low and make sure that changes are
 11accompanied by matching snapshot tests in `test/testdata/`.
 12"""
 13from __future__ import annotations
 14
 15import inspect
 16import re
 17import warnings
 18from pathlib import Path
 19from textwrap import dedent, indent
 20
 21from ._compat import cache
 22
 23
 24@cache
 25def convert(docstring: str, docformat: str, source_file: Path | None) -> str:
 26    """
 27    Convert `docstring` from `docformat` to Markdown.
 28    """
 29    docformat = docformat.lower()
 30
 31    if any(x in docformat for x in ["google", "numpy", "restructuredtext"]):
 32        docstring = rst(docstring, source_file)
 33
 34    if "google" in docformat:
 35        docstring = google(docstring)
 36
 37    if "numpy" in docformat:
 38        docstring = numpy(docstring)
 39
 40    return docstring
 41
 42
 43def google(docstring: str) -> str:
 44    """Convert Google-style docstring sections into Markdown."""
 45    return re.sub(
 46        r"""
 47        ^(?P<name>[A-Z][A-Z a-z]+):\n
 48        (?P<contents>(
 49            \n        # empty lines
 50            |         # or
 51            [ \t]+.+  # lines with indentation
 52        )+)$
 53        """,
 54        _google_section,
 55        docstring,
 56        flags=re.VERBOSE | re.MULTILINE,
 57    )
 58
 59
 60GOOGLE_LIST_SECTIONS = ["Args", "Raises", "Attributes"]
 61
 62
 63def _google_section(m: re.Match[str]) -> str:
 64    name = m.group("name")
 65    contents = dedent(m.group("contents"))
 66    if name in GOOGLE_LIST_SECTIONS:
 67        items = _indented_list(contents)
 68        contents = ""
 69        for item in items:
 70            try:
 71                # first ":" on the first line
 72                _, attr, desc = re.split(r"^(.+?:)", item, maxsplit=1)
 73            except ValueError:
 74                contents += " - " + indent(item, "   ")[3:]
 75            else:
 76                contents += f" - **{attr}** " + indent(desc, "   ")[3:]
 77            contents += "\n"
 78    else:
 79        contents = indent(contents, "> ", lambda line: True)
 80
 81    return f"\n###### {name}\n{contents}\n"
 82
 83
 84def _indented_list(contents: str) -> list[str]:
 85    """
 86    Convert a list string into individual (dedented) elements. For example,
 87
 88    foo:
 89        desc
 90    bar: int
 91        more desc
 92    baz:
 93        desc
 94            indented
 95
 96    returns [
 97        "foo:\ndesc",
 98        "bar: int\nmore desc",
 99        "baz:\ndesc\n    indented",
100    ]
101    """
102    # we expect this to be through cleandoc() already.
103    assert not contents.startswith(" ")
104    assert not contents.startswith("\n")
105    assert "\t" not in contents
106
107    ret: list[str] = []
108    for line in contents.splitlines(keepends=True):
109        empty = not line.strip()
110        indented = line.startswith(" ")
111        if not (empty or indented):
112            # new section
113            ret.append(line)
114        else:
115            # append to current section
116            ret[-1] += line
117
118    return [inspect.cleandoc(x) for x in ret]
119
120
121def numpy(docstring: str) -> str:
122    """Convert NumPy-style docstring sections into Markdown.
123
124    See <https://numpydoc.readthedocs.io/en/latest/format.html> for details.
125    """
126    sections = re.split(
127        r"""
128        ^([A-Z][A-Za-z ]+)\n  # a heading
129        ---+\n+              # followed by a dashed line
130        """,
131        docstring,
132        flags=re.VERBOSE | re.MULTILINE,
133    )
134    contents = sections[0]
135    for heading, content in zip(sections[1::2], sections[2::2]):
136        if content.startswith(" "):
137            # If the first line of section content is indented, we consider the section to be finished
138            # on the first non-indented line. We take out the rest - the tail - here.
139            content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1)
140        else:
141            tail = ""
142
143        if heading in (
144            "Parameters",
145            "Returns",
146            "Yields",
147            "Receives",
148            "Other Parameters",
149            "Raises",
150            "Warns",
151            "Attributes",
152        ):
153            contents += f"###### {heading}\n{_numpy_parameters(content)}"
154        elif heading == "See Also":
155            contents += f"###### {heading}\n{_numpy_seealso(content)}"
156        else:
157            contents += f"###### {heading}\n{dedent(content)}"
158        contents += tail
159    return contents
160
161
162def _numpy_seealso(content: str) -> str:
163    """Convert a NumPy-style "See Also" section into Markdown"""
164    contents = ""
165    for item in _indented_list(content):
166        if ":" in item:
167            funcstr, desc = item.split(":", maxsplit=1)
168            desc = f": {desc}"
169        else:
170            funcstr, desc = item, ""
171
172        funclist = [f.strip() for f in funcstr.split(" ")]
173        funcs = ", ".join(f"`{f}`" for f in funclist if f)
174        contents += f"{funcs}{desc}  \n"
175    return contents
176
177
178def _numpy_parameters(content: str) -> str:
179    """Convert a NumPy-style parameter section into Markdown"""
180    contents = ""
181    for item in _indented_list(content):
182        m = re.match(r"^(.+):(.+)([\s\S]*)", item)
183        if m:
184            contents += (
185                f" - **{m.group(1).strip()}** ({m.group(2).strip()}):\n"
186                f"{indent(m.group(3).strip(), '   ')}\n"
187            )
188        else:
189            if "\n" in item:
190                name, desc = item.split("\n", maxsplit=1)
191                name = name.strip()
192                desc = desc.strip()
193            else:
194                name, desc = item.strip(), ""
195
196            if desc:
197                contents += f" - **{name}**: {desc}\n"
198            else:
199                contents += f" - **{name}**\n"
200    return f"{contents}\n"
201
202
203def rst(contents: str, source_file: Path | None) -> str:
204    """
205    Convert reStructuredText elements to Markdown.
206    We support the most common elements, but we do not aim to mirror the full complexity of the spec here.
207    """
208    contents = _rst_admonitions(contents, source_file)
209    contents = _rst_links(contents)
210
211    # Code References: :obj:`foo` -> `foo`
212    contents = re.sub(
213        r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):", "", contents
214    )
215
216    # Math: :math:`foo` -> \\( foo \\)
217    # We don't use $ as that's not enabled by MathJax by default.
218    contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents)
219
220    contents = _rst_footnotes(contents)
221
222    contents = _rst_fields(contents)
223
224    return contents
225
226
227def _rst_footnotes(contents: str) -> str:
228    """Convert reStructuredText footnotes"""
229    footnotes: set[str] = set()
230    autonum: int
231
232    def register_footnote(m: re.Match[str]) -> str:
233        nonlocal autonum
234        fn_id = m.group("id")
235        if fn_id in "*#":
236            fn_id = f"fn-{autonum}"
237            autonum += 1
238        fn_id = fn_id.lstrip("#*")
239        footnotes.add(fn_id)
240        content = indent(m.group("content"), "   ").lstrip()
241        return f"{m.group('indent')}[^{fn_id}]: {content}"
242
243    # Register footnotes
244    autonum = 1
245    contents = re.sub(
246        r"""
247            ^(?P<indent>[ ]*)\.\.[ ]+\[(?P<id>\d+|[#*]\w*)](?P<content>.*
248            (
249                \n                 # empty lines
250                |                  # or
251                (?P=indent)[ ]+.+  # lines with indentation
252            )*)$
253            """,
254        register_footnote,
255        contents,
256        flags=re.MULTILINE | re.VERBOSE,
257    )
258
259    def replace_references(m: re.Match[str]) -> str:
260        nonlocal autonum
261        fn_id = m.group("id")
262        if fn_id in "*#":
263            fn_id = f"fn-{autonum}"
264            autonum += 1
265        fn_id = fn_id.lstrip("#*")
266        if fn_id in footnotes:
267            return f"[^{fn_id}]"
268        else:
269            return m.group(0)
270
271    autonum = 1
272    contents = re.sub(r"\[(?P<id>\d+|[#*]\w*)]_", replace_references, contents)
273    return contents
274
275
276def _rst_links(contents: str) -> str:
277    """Convert reStructuredText hyperlinks"""
278    links = {}
279
280    def register_link(m: re.Match[str]) -> str:
281        refid = re.sub(r"\s", "", m.group("id").lower())
282        links[refid] = m.group("url")
283        return ""
284
285    def replace_link(m: re.Match[str]) -> str:
286        text = m.group("id")
287        refid = re.sub(r"[\s`]", "", text.lower())
288        try:
289            return f"[{text.strip('`')}]({links[refid]})"
290        except KeyError:
291            return m.group(0)
292
293    # Embedded URIs
294    contents = re.sub(
295        r"`(?P<text>[^`]+)<(?P<url>.+?)>`_", r"[\g<text>](\g<url>)", contents
296    )
297    # External Hyperlink Targets
298    contents = re.sub(
299        r"^\s*..\s+_(?P<id>[^\n:]+):\s*(?P<url>http\S+)",
300        register_link,
301        contents,
302        flags=re.MULTILINE,
303    )
304    contents = re.sub(r"(?P<id>[A-Za-z0-9_\-.:+]|`[^`]+`)_", replace_link, contents)
305    return contents
306
307
308def _rst_admonitions(contents: str, source_file: Path | None) -> str:
309    """
310    Convert reStructuredText admonitions - a bit tricky because they may already be indented themselves.
311    <https://www.sphinx-doc.org/en/master/usage/restructuredtext/directives.html>
312    """
313
314    def _rst_admonition(m: re.Match[str]) -> str:
315        ind = m.group("indent")
316        type = m.group("type")
317        val = m.group("val").strip()
318        contents = dedent(m.group("contents")).strip()
319
320        if type == "include":
321            loc = source_file or Path(".")
322            try:
323                included = (loc.parent / val).read_text("utf8", "replace")
324            except OSError as e:
325                warnings.warn(f"Cannot include {val!r}: {e}")
326                included = "\n"
327            included = _rst_admonitions(included, loc.parent / val)
328            return indent(included, ind)
329        if type == "math":
330            return f"{ind}$${val}{contents}$$\n"
331        if type in ("note", "warning", "danger"):
332            if val:
333                heading = f"{ind}###### {val}\n"
334            else:
335                heading = ""
336            return (
337                f'{ind}<div class="pdoc-alert pdoc-alert-{type}" markdown="1">\n'
338                f"{heading}"
339                f"{indent(contents, ind)}\n"
340                f"{ind}</div>\n"
341            )
342        elif type == "versionadded":
343            text = f"New in version {val}"
344        elif type == "versionchanged":
345            text = f"Changed in version {val}"
346        elif type == "deprecated":
347            text = f"Deprecated since version {val}"
348        else:
349            text = f"{type} {val}".strip()
350
351        if contents:
352            text = f"{ind}*{text}:*\n{indent(contents, ind)}\n\n"
353        else:
354            text = f"{ind}*{text}.*\n"
355
356        return text
357
358    admonition = "note|warning|danger|versionadded|versionchanged|deprecated|seealso|math|include"
359    return re.sub(
360        rf"""
361            ^(?P<indent>[ ]*)\.\.[ ]+(?P<type>{admonition})::(?P<val>.*)
362            (?P<contents>(
363                \n                 # empty lines
364                |                  # or
365                (?P=indent)[ ]+.+  # lines with indentation
366            )*)$
367        """,
368        _rst_admonition,
369        contents,
370        flags=re.MULTILINE | re.VERBOSE,
371    )
372
373
374def _rst_fields(contents: str) -> str:
375    """
376    Convert reStructuredText fields to Markdown.
377    <https://www.sphinx-doc.org/en/master/usage/restructuredtext/basics.html#rst-field-lists>
378    """
379
380    _has_parameter_section = False
381    _has_raises_section = False
382
383    def _rst_field(m: re.Match[str]) -> str:
384        type = m["type"]
385        body = m["body"]
386
387        if m["name"]:
388            name = f"**{m['name'].strip()}**: "
389        else:
390            name = ""
391
392        if type == "param":
393            nonlocal _has_parameter_section
394            text = f" - {name}{body}"
395            if not _has_parameter_section:
396                _has_parameter_section = True
397                text = "\n###### Parameters\n" + text
398            return text
399        elif type == "type":
400            return ""  # we expect users to use modern type annotations.
401        elif type == "return":
402            body = indent(body, "> ", lambda line: True)
403            return f"\n###### Returns\n{body}"
404        elif type == "rtype":
405            return ""  # we expect users to use modern type annotations.
406        elif type == "raises":
407            nonlocal _has_raises_section
408            text = f" - {name}{body}"
409            if not _has_raises_section:
410                _has_raises_section = True
411                text = "\n###### Raises\n" + text
412            return text
413        else:  # pragma: no cover
414            raise AssertionError("unreachable")
415
416    field = "param|type|return|rtype|raises"
417    return re.sub(
418        rf"""
419            ^:(?P<type>{field})(?:[ ]+(?P<name>.+))?:
420            (?P<body>.*(
421                (?:\n[ ]*)*  # maybe some empty lines followed by
422                [ ]+.+       # lines with indentation
423            )*(?:\n|$))
424        """,
425        _rst_field,
426        contents,
427        flags=re.MULTILINE | re.VERBOSE,
428    )
@cache
def convert(docstring: str, docformat: str, source_file: pathlib.Path | None) -> str:
25@cache
26def convert(docstring: str, docformat: str, source_file: Path | None) -> str:
27    """
28    Convert `docstring` from `docformat` to Markdown.
29    """
30    docformat = docformat.lower()
31
32    if any(x in docformat for x in ["google", "numpy", "restructuredtext"]):
33        docstring = rst(docstring, source_file)
34
35    if "google" in docformat:
36        docstring = google(docstring)
37
38    if "numpy" in docformat:
39        docstring = numpy(docstring)
40
41    return docstring

Convert docstring from docformat to Markdown.

def google(docstring: str) -> str:
44def google(docstring: str) -> str:
45    """Convert Google-style docstring sections into Markdown."""
46    return re.sub(
47        r"""
48        ^(?P<name>[A-Z][A-Z a-z]+):\n
49        (?P<contents>(
50            \n        # empty lines
51            |         # or
52            [ \t]+.+  # lines with indentation
53        )+)$
54        """,
55        _google_section,
56        docstring,
57        flags=re.VERBOSE | re.MULTILINE,
58    )

Convert Google-style docstring sections into Markdown.

def numpy(docstring: str) -> str:
122def numpy(docstring: str) -> str:
123    """Convert NumPy-style docstring sections into Markdown.
124
125    See <https://numpydoc.readthedocs.io/en/latest/format.html> for details.
126    """
127    sections = re.split(
128        r"""
129        ^([A-Z][A-Za-z ]+)\n  # a heading
130        ---+\n+              # followed by a dashed line
131        """,
132        docstring,
133        flags=re.VERBOSE | re.MULTILINE,
134    )
135    contents = sections[0]
136    for heading, content in zip(sections[1::2], sections[2::2]):
137        if content.startswith(" "):
138            # If the first line of section content is indented, we consider the section to be finished
139            # on the first non-indented line. We take out the rest - the tail - here.
140            content, tail = re.split(r"\n(?![ \n])", content, maxsplit=1)
141        else:
142            tail = ""
143
144        if heading in (
145            "Parameters",
146            "Returns",
147            "Yields",
148            "Receives",
149            "Other Parameters",
150            "Raises",
151            "Warns",
152            "Attributes",
153        ):
154            contents += f"###### {heading}\n{_numpy_parameters(content)}"
155        elif heading == "See Also":
156            contents += f"###### {heading}\n{_numpy_seealso(content)}"
157        else:
158            contents += f"###### {heading}\n{dedent(content)}"
159        contents += tail
160    return contents

Convert NumPy-style docstring sections into Markdown.

See https://numpydoc.readthedocs.io/en/latest/format.html for details.

def rst(contents: str, source_file: pathlib.Path | None) -> str:
204def rst(contents: str, source_file: Path | None) -> str:
205    """
206    Convert reStructuredText elements to Markdown.
207    We support the most common elements, but we do not aim to mirror the full complexity of the spec here.
208    """
209    contents = _rst_admonitions(contents, source_file)
210    contents = _rst_links(contents)
211
212    # Code References: :obj:`foo` -> `foo`
213    contents = re.sub(
214        r"(:py)?:(mod|func|data|const|class|meth|attr|exc|obj):", "", contents
215    )
216
217    # Math: :math:`foo` -> \\( foo \\)
218    # We don't use $ as that's not enabled by MathJax by default.
219    contents = re.sub(r":math:`(.+?)`", r"\\\\( \1 \\\\)", contents)
220
221    contents = _rst_footnotes(contents)
222
223    contents = _rst_fields(contents)
224
225    return contents

Convert reStructuredText elements to Markdown. We support the most common elements, but we do not aim to mirror the full complexity of the spec here.