Edit on GitHub

pdoc.doc_ast

This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.

Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__.

  1"""
  2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc.
  3
  4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`.
  5"""
  6from __future__ import annotations
  7
  8import ast
  9import inspect
 10import types
 11import warnings
 12from collections.abc import Iterable, Iterator
 13from dataclasses import dataclass
 14from itertools import tee, zip_longest
 15from typing import Any, TypeVar, overload
 16
 17from ._compat import ast_unparse, cache
 18
 19
 20def get_source(obj: Any) -> str:
 21    """
 22    Returns the source code of the Python object `obj` as a str.
 23    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
 24
 25    If this fails, an empty string is returned.
 26    """
 27    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
 28    try:
 29        return _get_source(obj)
 30    except TypeError:
 31        return _get_source.__wrapped__(obj)
 32
 33
 34@cache
 35def _get_source(obj: Any) -> str:
 36    try:
 37        return inspect.getsource(obj)
 38    except Exception:
 39        return ""
 40
 41
 42@overload
 43def parse(obj: types.ModuleType) -> ast.Module:
 44    ...
 45
 46
 47@overload
 48def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef:
 49    ...
 50
 51
 52@overload
 53def parse(obj: type) -> ast.ClassDef:
 54    ...
 55
 56
 57def parse(obj):
 58    """
 59    Parse a module, class or function and return the (unwrapped) AST node.
 60    If an object's source code cannot be found, this function returns an empty ast node stub
 61    which can still be walked.
 62    """
 63    src = get_source(obj)
 64    if isinstance(obj, types.ModuleType):
 65        return _parse_module(src)
 66    elif isinstance(obj, type):
 67        return _parse_class(src)
 68    else:
 69        return _parse_function(src)
 70
 71
 72@cache
 73def unparse(tree: ast.AST):
 74    """`ast.unparse`, but cached."""
 75    return ast_unparse(tree)
 76
 77
 78@dataclass
 79class AstInfo:
 80    """The information extracted from walking the syntax tree."""
 81
 82    docstrings: dict[str, str]
 83    """A qualname -> docstring mapping."""
 84    annotations: dict[str, str]
 85    """A qualname -> annotation mapping.
 86    
 87    Annotations are not evaluated by this module and only returned as strings."""
 88
 89
 90def walk_tree(obj: types.ModuleType | type) -> AstInfo:
 91    """
 92    Walks the abstract syntax tree for `obj` and returns the extracted information.
 93    """
 94    return _walk_tree(parse(obj))
 95
 96
 97@cache
 98def _walk_tree(
 99    tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef,
100) -> AstInfo:
101    docstrings = {}
102    annotations = {}
103    for a, b in _pairwise_longest(_nodes(tree)):
104        if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple:
105            name = a.target.id
106            annotations[name] = unparse(a.annotation)
107        elif (
108            isinstance(a, ast.Assign)
109            and len(a.targets) == 1
110            and isinstance(a.targets[0], ast.Name)
111        ):
112            name = a.targets[0].id
113        elif isinstance(a, ast.FunctionDef) and a.body:
114            first = a.body[0]
115            if isinstance(first, ast.Expr) and isinstance(first.value, ast.Str):
116                docstrings[a.name] = inspect.cleandoc(first.value.s).strip()
117            continue
118        else:
119            continue
120        if (
121            isinstance(b, ast.Expr)
122            and isinstance(b.value, ast.Constant)
123            and isinstance(b.value.value, str)
124        ):
125            docstrings[name] = inspect.cleandoc(b.value.value).strip()
126        elif isinstance(b, ast.Expr) and isinstance(
127            b.value, ast.Str
128        ):  # pragma: no cover
129            # Python <= 3.7
130            docstrings[name] = inspect.cleandoc(b.value.s).strip()
131    return AstInfo(
132        docstrings,
133        annotations,
134    )
135
136
137T = TypeVar("T")
138
139
140def sort_by_source(
141    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
142) -> tuple[dict[str, T], dict[str, T]]:
143    """
144    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
145    The only exception to this rule is `__init__`, which (if present) is always inserted first.
146
147    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
148
149    Returns a `(sorted, not found)` tuple.
150    """
151    tree = parse(obj)
152
153    if "__init__" in unsorted:
154        sorted["__init__"] = unsorted.pop("__init__")
155
156    for a in _nodes(tree):
157        if (
158            isinstance(a, ast.Assign)
159            and len(a.targets) == 1
160            and isinstance(a.targets[0], ast.Name)
161        ):
162            name = a.targets[0].id
163        elif (
164            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
165        ):
166            name = a.target.id
167        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
168            name = a.name
169        else:
170            continue
171
172        if name in unsorted:
173            sorted[name] = unsorted.pop(name)
174    return sorted, unsorted
175
176
177def type_checking_sections(mod: types.ModuleType) -> ast.Module:
178    """
179    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
180    """
181    ret = ast.Module(body=[], type_ignores=[])
182    tree = _parse_module(get_source(mod))
183    for node in tree.body:
184        if (
185            isinstance(node, ast.If)
186            and isinstance(node.test, ast.Name)
187            and node.test.id == "TYPE_CHECKING"
188        ):
189            ret.body.extend(node.body)
190        if (
191            isinstance(node, ast.If)
192            and isinstance(node.test, ast.Attribute)
193            and isinstance(node.test.value, ast.Name)
194            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
195            # and node.test.value.id == "typing"
196            and node.test.attr == "TYPE_CHECKING"
197        ):
198            ret.body.extend(node.body)
199    return ret
200
201
202@cache
203def _parse_module(source: str) -> ast.Module:
204    """
205    Parse the AST for the source code of a module and return the ast.Module.
206
207    Returns an empty ast.Module if source is empty.
208    """
209    tree = _parse(source)
210    assert isinstance(tree, ast.Module)
211    return tree
212
213
214@cache
215def _parse_class(source: str) -> ast.ClassDef:
216    """
217    Parse the AST for the source code of a class and return the ast.ClassDef.
218
219    Returns an empty ast.ClassDef if source is empty.
220    """
221    tree = _parse(source)
222    assert len(tree.body) <= 1
223    if tree.body:
224        t = tree.body[0]
225        assert isinstance(t, ast.ClassDef)
226        return t
227    return ast.ClassDef(body=[], decorator_list=[])
228
229
230@cache
231def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef:
232    """
233    Parse the AST for the source code of a (async) function and return the matching AST node.
234
235    Returns an empty ast.FunctionDef if source is empty.
236    """
237    tree = _parse(source)
238    assert len(tree.body) <= 1
239    if tree.body:
240        t = tree.body[0]
241        if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)):
242            return t
243        else:
244            # we have a lambda function,
245            # to simplify the API return the ast.FunctionDef stub.
246            pass
247    return ast.FunctionDef(body=[], decorator_list=[])
248
249
250def _parse(
251    source: str,
252) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef:
253    try:
254        return ast.parse(_dedent(source))
255    except Exception as e:
256        warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===")
257        return ast.parse("")
258
259
260@cache
261def _dedent(source: str) -> str:
262    """
263    Dedent the head of a function or class definition so that it can be parsed by `ast.parse`.
264    This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings
265    without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`:
266
267    class Foo:
268        def bar(self):
269           '''
270    this is a docstring
271           '''
272    """
273    if not source or source[0] not in (" ", "\t"):
274        return source
275    source = source.lstrip()
276    # we may have decorators before our function definition, in which case we need to dedent a few more lines.
277    # the following heuristic should be good enough to detect if we have reached the definition.
278    # it's easy to produce examples where this fails, but this probably is not a problem in practice.
279    if not any(source.startswith(x) for x in ["async ", "def ", "class "]):
280        first_line, rest = source.split("\n", 1)
281        return first_line + "\n" + _dedent(rest)
282    else:
283        return source
284
285
286@cache
287def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]:
288    """
289    Returns the list of all nodes in tree's body, but also inlines the body of __init__.
290
291    This is useful to detect all declared variables in a class, even if they only appear in the constructor.
292    """
293    return list(_nodes_iter(tree))
294
295
296def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]:
297    for a in tree.body:
298        yield a
299        if isinstance(a, ast.FunctionDef) and a.name == "__init__":
300            yield from _init_nodes(a)
301
302
303def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]:
304    """
305    Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42",
306    keep all constant expressions, and no-op everything else.
307    This essentially allows us to inline __init__ when parsing a class definition.
308    """
309    for a in tree.body:
310        if (
311            isinstance(a, ast.AnnAssign)
312            and isinstance(a.target, ast.Attribute)
313            and isinstance(a.target.value, ast.Name)
314            and a.target.value.id == "self"
315        ):
316            yield ast.AnnAssign(
317                ast.Name(a.target.attr), a.annotation, a.value, simple=1
318            )
319        elif (
320            isinstance(a, ast.Assign)
321            and len(a.targets) == 1
322            and isinstance(a.targets[0], ast.Attribute)
323            and isinstance(a.targets[0].value, ast.Name)
324            and a.targets[0].value.id == "self"
325        ):
326            yield ast.Assign(
327                [ast.Name(a.targets[0].attr)],
328                value=a.value,
329                # not available on Python 3.7
330                type_comment=getattr(a, "type_comment", None),
331            )
332        elif (
333            isinstance(a, ast.Expr)
334            and isinstance(a.value, ast.Constant)
335            and isinstance(a.value.value, str)
336        ):
337            yield a
338        elif isinstance(a, ast.Expr) and isinstance(
339            a.value, ast.Str
340        ):  # pragma: no cover
341            # Python <= 3.7
342            yield a
343        else:
344            yield ast.Pass()
345
346
347def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]:
348    """s -> (s0,s1), (s1,s2), (s2, s3),  ..., (sN, None)"""
349    a, b = tee(iterable)
350    next(b, None)
351    return zip_longest(a, b)
def get_source(obj: Any) -> str:
21def get_source(obj: Any) -> str:
22    """
23    Returns the source code of the Python object `obj` as a str.
24    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
25
26    If this fails, an empty string is returned.
27    """
28    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
29    try:
30        return _get_source(obj)
31    except TypeError:
32        return _get_source.__wrapped__(obj)

Returns the source code of the Python object obj as a str. This tries to first unwrap the method if it is wrapped and then calls inspect.getsource.

If this fails, an empty string is returned.

def parse(obj):
58def parse(obj):
59    """
60    Parse a module, class or function and return the (unwrapped) AST node.
61    If an object's source code cannot be found, this function returns an empty ast node stub
62    which can still be walked.
63    """
64    src = get_source(obj)
65    if isinstance(obj, types.ModuleType):
66        return _parse_module(src)
67    elif isinstance(obj, type):
68        return _parse_class(src)
69    else:
70        return _parse_function(src)

Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.

@cache
def unparse(tree: ast.AST):
73@cache
74def unparse(tree: ast.AST):
75    """`ast.unparse`, but cached."""
76    return ast_unparse(tree)

ast.unparse, but cached.

@dataclass
class AstInfo:
79@dataclass
80class AstInfo:
81    """The information extracted from walking the syntax tree."""
82
83    docstrings: dict[str, str]
84    """A qualname -> docstring mapping."""
85    annotations: dict[str, str]
86    """A qualname -> annotation mapping.
87    
88    Annotations are not evaluated by this module and only returned as strings."""

The information extracted from walking the syntax tree.

AstInfo(docstrings: dict[str, str], annotations: dict[str, str])
docstrings: dict[str, str]

A qualname -> docstring mapping.

annotations: dict[str, str]

A qualname -> annotation mapping.

Annotations are not evaluated by this module and only returned as strings.

def walk_tree(obj: module | type) -> pdoc.doc_ast.AstInfo:
91def walk_tree(obj: types.ModuleType | type) -> AstInfo:
92    """
93    Walks the abstract syntax tree for `obj` and returns the extracted information.
94    """
95    return _walk_tree(parse(obj))

Walks the abstract syntax tree for obj and returns the extracted information.

def sort_by_source( obj: module | type, sorted: dict[str, ~T], unsorted: dict[str, ~T]) -> tuple[dict[str, ~T], dict[str, ~T]]:
141def sort_by_source(
142    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
143) -> tuple[dict[str, T], dict[str, T]]:
144    """
145    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
146    The only exception to this rule is `__init__`, which (if present) is always inserted first.
147
148    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
149
150    Returns a `(sorted, not found)` tuple.
151    """
152    tree = parse(obj)
153
154    if "__init__" in unsorted:
155        sorted["__init__"] = unsorted.pop("__init__")
156
157    for a in _nodes(tree):
158        if (
159            isinstance(a, ast.Assign)
160            and len(a.targets) == 1
161            and isinstance(a.targets[0], ast.Name)
162        ):
163            name = a.targets[0].id
164        elif (
165            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
166        ):
167            name = a.target.id
168        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
169            name = a.name
170        else:
171            continue
172
173        if name in unsorted:
174            sorted[name] = unsorted.pop(name)
175    return sorted, unsorted

Takes items from unsorted and inserts them into sorted in order of appearance in the source code of obj. The only exception to this rule is __init__, which (if present) is always inserted first.

Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.

Returns a (sorted, not found) tuple.

def type_checking_sections(mod: module) -> ast.Module:
178def type_checking_sections(mod: types.ModuleType) -> ast.Module:
179    """
180    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
181    """
182    ret = ast.Module(body=[], type_ignores=[])
183    tree = _parse_module(get_source(mod))
184    for node in tree.body:
185        if (
186            isinstance(node, ast.If)
187            and isinstance(node.test, ast.Name)
188            and node.test.id == "TYPE_CHECKING"
189        ):
190            ret.body.extend(node.body)
191        if (
192            isinstance(node, ast.If)
193            and isinstance(node.test, ast.Attribute)
194            and isinstance(node.test.value, ast.Name)
195            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
196            # and node.test.value.id == "typing"
197            and node.test.attr == "TYPE_CHECKING"
198        ):
199            ret.body.extend(node.body)
200    return ret

Walks the abstract syntax tree for mod and returns all statements guarded by TYPE_CHECKING blocks.