Edit on GitHub

pdoc.doc_ast

This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.

Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__.

  1"""
  2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc.
  3
  4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`.
  5"""
  6from __future__ import annotations
  7
  8import ast
  9import inspect
 10import types
 11import warnings
 12from collections.abc import Iterable
 13from collections.abc import Iterator
 14from dataclasses import dataclass
 15from itertools import tee
 16from itertools import zip_longest
 17from typing import Any
 18from typing import overload
 19from typing import TypeVar
 20
 21import pdoc
 22from ._compat import ast_unparse
 23from ._compat import cache
 24
 25
 26def get_source(obj: Any) -> str:
 27    """
 28    Returns the source code of the Python object `obj` as a str.
 29    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
 30
 31    If this fails, an empty string is returned.
 32    """
 33    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
 34    try:
 35        return _get_source(obj)
 36    except TypeError:
 37        return _get_source.__wrapped__(obj)
 38
 39
 40@cache
 41def _get_source(obj: Any) -> str:
 42    try:
 43        return inspect.getsource(obj)
 44    except Exception:
 45        return ""
 46
 47
 48@overload
 49def parse(obj: types.ModuleType) -> ast.Module:
 50    ...
 51
 52
 53@overload
 54def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef:
 55    ...
 56
 57
 58@overload
 59def parse(obj: type) -> ast.ClassDef:
 60    ...
 61
 62
 63def parse(obj):
 64    """
 65    Parse a module, class or function and return the (unwrapped) AST node.
 66    If an object's source code cannot be found, this function returns an empty ast node stub
 67    which can still be walked.
 68    """
 69    src = get_source(obj)
 70    if isinstance(obj, types.ModuleType):
 71        return _parse_module(src)
 72    elif isinstance(obj, type):
 73        return _parse_class(src)
 74    else:
 75        return _parse_function(src)
 76
 77
 78@cache
 79def unparse(tree: ast.AST):
 80    """`ast.unparse`, but cached."""
 81    return ast_unparse(tree)
 82
 83
 84@dataclass
 85class AstInfo:
 86    """The information extracted from walking the syntax tree."""
 87
 88    docstrings: dict[str, str]
 89    """A qualname -> docstring mapping."""
 90    annotations: dict[str, str]
 91    """A qualname -> annotation mapping.
 92    
 93    Annotations are not evaluated by this module and only returned as strings."""
 94
 95
 96def walk_tree(obj: types.ModuleType | type) -> AstInfo:
 97    """
 98    Walks the abstract syntax tree for `obj` and returns the extracted information.
 99    """
100    return _walk_tree(parse(obj))
101
102
103@cache
104def _walk_tree(
105    tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef,
106) -> AstInfo:
107    docstrings = {}
108    annotations = {}
109    for a, b in _pairwise_longest(_nodes(tree)):
110        if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple:
111            name = a.target.id
112            annotations[name] = unparse(a.annotation)
113        elif (
114            isinstance(a, ast.Assign)
115            and len(a.targets) == 1
116            and isinstance(a.targets[0], ast.Name)
117        ):
118            name = a.targets[0].id
119            # Make sure that all assignments are picked up, even is there is
120            # no annotation or docstring.
121            annotations.setdefault(name, pdoc.doc_types.empty)
122        elif isinstance(a, ast.FunctionDef) and a.body:
123            first = a.body[0]
124            if isinstance(first, ast.Expr) and isinstance(first.value, ast.Str):
125                docstrings[a.name] = inspect.cleandoc(first.value.s).strip()
126            continue
127        else:
128            continue
129        if (
130            isinstance(b, ast.Expr)
131            and isinstance(b.value, ast.Constant)
132            and isinstance(b.value.value, str)
133        ):
134            docstrings[name] = inspect.cleandoc(b.value.value).strip()
135        elif isinstance(b, ast.Expr) and isinstance(
136            b.value, ast.Str
137        ):  # pragma: no cover
138            # Python <= 3.7
139            docstrings[name] = inspect.cleandoc(b.value.s).strip()
140    return AstInfo(
141        docstrings,
142        annotations,
143    )
144
145
146T = TypeVar("T")
147
148
149def sort_by_source(
150    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
151) -> tuple[dict[str, T], dict[str, T]]:
152    """
153    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
154    The only exception to this rule is `__init__`, which (if present) is always inserted first.
155
156    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
157
158    Returns a `(sorted, not found)` tuple.
159    """
160    tree = parse(obj)
161
162    if "__init__" in unsorted:
163        sorted["__init__"] = unsorted.pop("__init__")
164
165    for a in _nodes(tree):
166        if (
167            isinstance(a, ast.Assign)
168            and len(a.targets) == 1
169            and isinstance(a.targets[0], ast.Name)
170        ):
171            name = a.targets[0].id
172        elif (
173            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
174        ):
175            name = a.target.id
176        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
177            name = a.name
178        else:
179            continue
180
181        if name in unsorted:
182            sorted[name] = unsorted.pop(name)
183    return sorted, unsorted
184
185
186def type_checking_sections(mod: types.ModuleType) -> ast.Module:
187    """
188    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
189    """
190    ret = ast.Module(body=[], type_ignores=[])
191    tree = _parse_module(get_source(mod))
192    for node in tree.body:
193        if (
194            isinstance(node, ast.If)
195            and isinstance(node.test, ast.Name)
196            and node.test.id == "TYPE_CHECKING"
197        ):
198            ret.body.extend(node.body)
199        if (
200            isinstance(node, ast.If)
201            and isinstance(node.test, ast.Attribute)
202            and isinstance(node.test.value, ast.Name)
203            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
204            # and node.test.value.id == "typing"
205            and node.test.attr == "TYPE_CHECKING"
206        ):
207            ret.body.extend(node.body)
208    return ret
209
210
211@cache
212def _parse_module(source: str) -> ast.Module:
213    """
214    Parse the AST for the source code of a module and return the ast.Module.
215
216    Returns an empty ast.Module if source is empty.
217    """
218    tree = _parse(source)
219    assert isinstance(tree, ast.Module)
220    return tree
221
222
223@cache
224def _parse_class(source: str) -> ast.ClassDef:
225    """
226    Parse the AST for the source code of a class and return the ast.ClassDef.
227
228    Returns an empty ast.ClassDef if source is empty.
229    """
230    tree = _parse(source)
231    assert len(tree.body) <= 1
232    if tree.body:
233        t = tree.body[0]
234        assert isinstance(t, ast.ClassDef)
235        return t
236    return ast.ClassDef(body=[], decorator_list=[])
237
238
239@cache
240def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef:
241    """
242    Parse the AST for the source code of a (async) function and return the matching AST node.
243
244    Returns an empty ast.FunctionDef if source is empty.
245    """
246    tree = _parse(source)
247    assert len(tree.body) <= 1
248    if tree.body:
249        t = tree.body[0]
250        if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)):
251            return t
252        else:
253            # we have a lambda function,
254            # to simplify the API return the ast.FunctionDef stub.
255            pass
256    return ast.FunctionDef(body=[], decorator_list=[])
257
258
259def _parse(
260    source: str,
261) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef:
262    try:
263        return ast.parse(_dedent(source))
264    except Exception as e:
265        warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===")
266        return ast.parse("")
267
268
269@cache
270def _dedent(source: str) -> str:
271    """
272    Dedent the head of a function or class definition so that it can be parsed by `ast.parse`.
273    This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings
274    without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`:
275
276    class Foo:
277        def bar(self):
278           '''
279    this is a docstring
280           '''
281    """
282    if not source or source[0] not in (" ", "\t"):
283        return source
284    source = source.lstrip()
285    # we may have decorators before our function definition, in which case we need to dedent a few more lines.
286    # the following heuristic should be good enough to detect if we have reached the definition.
287    # it's easy to produce examples where this fails, but this probably is not a problem in practice.
288    if not any(source.startswith(x) for x in ["async ", "def ", "class "]):
289        first_line, rest = source.split("\n", 1)
290        return first_line + "\n" + _dedent(rest)
291    else:
292        return source
293
294
295@cache
296def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]:
297    """
298    Returns the list of all nodes in tree's body, but also inlines the body of __init__.
299
300    This is useful to detect all declared variables in a class, even if they only appear in the constructor.
301    """
302    return list(_nodes_iter(tree))
303
304
305def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]:
306    for a in tree.body:
307        yield a
308        if isinstance(a, ast.FunctionDef) and a.name == "__init__":
309            yield from _init_nodes(a)
310
311
312def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]:
313    """
314    Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42",
315    keep all constant expressions, and no-op everything else.
316    This essentially allows us to inline __init__ when parsing a class definition.
317    """
318    for a in tree.body:
319        if (
320            isinstance(a, ast.AnnAssign)
321            and isinstance(a.target, ast.Attribute)
322            and isinstance(a.target.value, ast.Name)
323            and a.target.value.id == "self"
324        ):
325            yield ast.AnnAssign(
326                ast.Name(a.target.attr), a.annotation, a.value, simple=1
327            )
328        elif (
329            isinstance(a, ast.Assign)
330            and len(a.targets) == 1
331            and isinstance(a.targets[0], ast.Attribute)
332            and isinstance(a.targets[0].value, ast.Name)
333            and a.targets[0].value.id == "self"
334        ):
335            yield ast.Assign(
336                [ast.Name(a.targets[0].attr)],
337                value=a.value,
338                # not available on Python 3.7
339                type_comment=getattr(a, "type_comment", None),
340            )
341        elif (
342            isinstance(a, ast.Expr)
343            and isinstance(a.value, ast.Constant)
344            and isinstance(a.value.value, str)
345        ):
346            yield a
347        elif isinstance(a, ast.Expr) and isinstance(
348            a.value, ast.Str
349        ):  # pragma: no cover
350            # Python <= 3.7
351            yield a
352        else:
353            yield ast.Pass()
354
355
356def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]:
357    """s -> (s0,s1), (s1,s2), (s2, s3),  ..., (sN, None)"""
358    a, b = tee(iterable)
359    next(b, None)
360    return zip_longest(a, b)
def get_source(obj: Any) -> str:
27def get_source(obj: Any) -> str:
28    """
29    Returns the source code of the Python object `obj` as a str.
30    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
31
32    If this fails, an empty string is returned.
33    """
34    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
35    try:
36        return _get_source(obj)
37    except TypeError:
38        return _get_source.__wrapped__(obj)

Returns the source code of the Python object obj as a str. This tries to first unwrap the method if it is wrapped and then calls inspect.getsource.

If this fails, an empty string is returned.

def parse(obj):
64def parse(obj):
65    """
66    Parse a module, class or function and return the (unwrapped) AST node.
67    If an object's source code cannot be found, this function returns an empty ast node stub
68    which can still be walked.
69    """
70    src = get_source(obj)
71    if isinstance(obj, types.ModuleType):
72        return _parse_module(src)
73    elif isinstance(obj, type):
74        return _parse_class(src)
75    else:
76        return _parse_function(src)

Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.

@cache
def unparse(tree: ast.AST):
79@cache
80def unparse(tree: ast.AST):
81    """`ast.unparse`, but cached."""
82    return ast_unparse(tree)

ast.unparse, but cached.

@dataclass
class AstInfo:
85@dataclass
86class AstInfo:
87    """The information extracted from walking the syntax tree."""
88
89    docstrings: dict[str, str]
90    """A qualname -> docstring mapping."""
91    annotations: dict[str, str]
92    """A qualname -> annotation mapping.
93    
94    Annotations are not evaluated by this module and only returned as strings."""

The information extracted from walking the syntax tree.

AstInfo(docstrings: dict[str, str], annotations: dict[str, str])
docstrings: dict[str, str]

A qualname -> docstring mapping.

annotations: dict[str, str]

A qualname -> annotation mapping.

Annotations are not evaluated by this module and only returned as strings.

def walk_tree(obj: module | type) -> pdoc.doc_ast.AstInfo:
 97def walk_tree(obj: types.ModuleType | type) -> AstInfo:
 98    """
 99    Walks the abstract syntax tree for `obj` and returns the extracted information.
100    """
101    return _walk_tree(parse(obj))

Walks the abstract syntax tree for obj and returns the extracted information.

def sort_by_source( obj: module | type, sorted: dict[str, ~T], unsorted: dict[str, ~T]) -> tuple[dict[str, ~T], dict[str, ~T]]:
150def sort_by_source(
151    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
152) -> tuple[dict[str, T], dict[str, T]]:
153    """
154    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
155    The only exception to this rule is `__init__`, which (if present) is always inserted first.
156
157    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
158
159    Returns a `(sorted, not found)` tuple.
160    """
161    tree = parse(obj)
162
163    if "__init__" in unsorted:
164        sorted["__init__"] = unsorted.pop("__init__")
165
166    for a in _nodes(tree):
167        if (
168            isinstance(a, ast.Assign)
169            and len(a.targets) == 1
170            and isinstance(a.targets[0], ast.Name)
171        ):
172            name = a.targets[0].id
173        elif (
174            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
175        ):
176            name = a.target.id
177        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
178            name = a.name
179        else:
180            continue
181
182        if name in unsorted:
183            sorted[name] = unsorted.pop(name)
184    return sorted, unsorted

Takes items from unsorted and inserts them into sorted in order of appearance in the source code of obj. The only exception to this rule is __init__, which (if present) is always inserted first.

Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.

Returns a (sorted, not found) tuple.

def type_checking_sections(mod: module) -> ast.Module:
187def type_checking_sections(mod: types.ModuleType) -> ast.Module:
188    """
189    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
190    """
191    ret = ast.Module(body=[], type_ignores=[])
192    tree = _parse_module(get_source(mod))
193    for node in tree.body:
194        if (
195            isinstance(node, ast.If)
196            and isinstance(node.test, ast.Name)
197            and node.test.id == "TYPE_CHECKING"
198        ):
199            ret.body.extend(node.body)
200        if (
201            isinstance(node, ast.If)
202            and isinstance(node.test, ast.Attribute)
203            and isinstance(node.test.value, ast.Name)
204            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
205            # and node.test.value.id == "typing"
206            and node.test.attr == "TYPE_CHECKING"
207        ):
208            ret.body.extend(node.body)
209    return ret

Walks the abstract syntax tree for mod and returns all statements guarded by TYPE_CHECKING blocks.