Edit on GitHub

pdoc.doc_ast

This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.

Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__.

  1"""
  2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc.
  3
  4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`.
  5"""
  6from __future__ import annotations
  7
  8import ast
  9import inspect
 10import types
 11import warnings
 12from collections.abc import Iterable, Iterator
 13from dataclasses import dataclass
 14from itertools import tee, zip_longest
 15from typing import Any, TypeVar, overload
 16
 17from ._compat import ast_unparse, cache
 18
 19
 20def get_source(obj: Any) -> str:
 21    """
 22    Returns the source code of the Python object `obj` as a str.
 23    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
 24
 25    If this fails, an empty string is returned.
 26    """
 27    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
 28    try:
 29        return _get_source(obj)
 30    except TypeError:
 31        return _get_source.__wrapped__(obj)
 32
 33
 34@cache
 35def _get_source(obj: Any) -> str:
 36    try:
 37        return inspect.getsource(obj)
 38    except Exception:
 39        return ""
 40
 41
 42@overload
 43def parse(obj: types.ModuleType) -> ast.Module:
 44    ...
 45
 46
 47@overload
 48def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef:
 49    ...
 50
 51
 52@overload
 53def parse(obj: type) -> ast.ClassDef:
 54    ...
 55
 56
 57def parse(obj):
 58    """
 59    Parse a module, class or function and return the (unwrapped) AST node.
 60    If an object's source code cannot be found, this function returns an empty ast node stub
 61    which can still be walked.
 62    """
 63    src = get_source(obj)
 64    if isinstance(obj, types.ModuleType):
 65        return _parse_module(src)
 66    elif isinstance(obj, type):
 67        return _parse_class(src)
 68    else:
 69        return _parse_function(src)
 70
 71
 72@cache
 73def unparse(tree: ast.AST):
 74    """`ast.unparse`, but cached."""
 75    return ast_unparse(tree)
 76
 77
 78@dataclass
 79class AstInfo:
 80    """The information extracted from walking the syntax tree."""
 81
 82    docstrings: dict[str, str]
 83    """A qualname -> docstring mapping."""
 84    annotations: dict[str, str]
 85    """A qualname -> annotation mapping.
 86    
 87    Annotations are not evaluated by this module and only returned as strings."""
 88
 89
 90def walk_tree(obj: types.ModuleType | type) -> AstInfo:
 91    """
 92    Walks the abstract syntax tree for `obj` and returns the extracted information.
 93    """
 94    return _walk_tree(parse(obj))
 95
 96
 97@cache
 98def _walk_tree(
 99    tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef,
100) -> AstInfo:
101    docstrings = {}
102    annotations = {}
103    for a, b in _pairwise_longest(_nodes(tree)):
104        if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple:
105            name = a.target.id
106            annotations[name] = unparse(a.annotation)
107        elif (
108            isinstance(a, ast.Assign)
109            and len(a.targets) == 1
110            and isinstance(a.targets[0], ast.Name)
111        ):
112            name = a.targets[0].id
113        else:
114            continue
115        if (
116            isinstance(b, ast.Expr)
117            and isinstance(b.value, ast.Constant)
118            and isinstance(b.value.value, str)
119        ):
120            docstrings[name] = inspect.cleandoc(b.value.value).strip()
121        elif isinstance(b, ast.Expr) and isinstance(
122            b.value, ast.Str
123        ):  # pragma: no cover
124            # Python <= 3.7
125            docstrings[name] = inspect.cleandoc(b.value.s).strip()
126    return AstInfo(
127        docstrings,
128        annotations,
129    )
130
131
132T = TypeVar("T")
133
134
135def sort_by_source(
136    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
137) -> tuple[dict[str, T], dict[str, T]]:
138    """
139    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
140    The only exception to this rule is `__init__`, which (if present) is always inserted first.
141
142    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
143
144    Returns a `(sorted, not found)` tuple.
145    """
146    tree = parse(obj)
147
148    if "__init__" in unsorted:
149        sorted["__init__"] = unsorted.pop("__init__")
150
151    for a in _nodes(tree):
152        if (
153            isinstance(a, ast.Assign)
154            and len(a.targets) == 1
155            and isinstance(a.targets[0], ast.Name)
156        ):
157            name = a.targets[0].id
158        elif (
159            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
160        ):
161            name = a.target.id
162        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
163            name = a.name
164        else:
165            continue
166
167        if name in unsorted:
168            sorted[name] = unsorted.pop(name)
169    return sorted, unsorted
170
171
172def type_checking_sections(mod: types.ModuleType) -> ast.Module:
173    """
174    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
175    """
176    ret = ast.Module(body=[], type_ignores=[])
177    tree = _parse_module(get_source(mod))
178    for node in tree.body:
179        if (
180            isinstance(node, ast.If)
181            and isinstance(node.test, ast.Name)
182            and node.test.id == "TYPE_CHECKING"
183        ):
184            ret.body.extend(node.body)
185        if (
186            isinstance(node, ast.If)
187            and isinstance(node.test, ast.Attribute)
188            and isinstance(node.test.value, ast.Name)
189            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
190            # and node.test.value.id == "typing"
191            and node.test.attr == "TYPE_CHECKING"
192        ):
193            ret.body.extend(node.body)
194    return ret
195
196
197@cache
198def _parse_module(source: str) -> ast.Module:
199    """
200    Parse the AST for the source code of a module and return the ast.Module.
201
202    Returns an empty ast.Module if source is empty.
203    """
204    tree = _parse(source)
205    assert isinstance(tree, ast.Module)
206    return tree
207
208
209@cache
210def _parse_class(source: str) -> ast.ClassDef:
211    """
212    Parse the AST for the source code of a class and return the ast.ClassDef.
213
214    Returns an empty ast.ClassDef if source is empty.
215    """
216    tree = _parse(source)
217    assert len(tree.body) <= 1
218    if tree.body:
219        t = tree.body[0]
220        assert isinstance(t, ast.ClassDef)
221        return t
222    return ast.ClassDef(body=[], decorator_list=[])
223
224
225@cache
226def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef:
227    """
228    Parse the AST for the source code of a (async) function and return the matching AST node.
229
230    Returns an empty ast.FunctionDef if source is empty.
231    """
232    tree = _parse(source)
233    assert len(tree.body) <= 1
234    if tree.body:
235        t = tree.body[0]
236        if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)):
237            return t
238        else:
239            # we have a lambda function,
240            # to simplify the API return the ast.FunctionDef stub.
241            pass
242    return ast.FunctionDef(body=[], decorator_list=[])
243
244
245def _parse(
246    source: str,
247) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef:
248    try:
249        return ast.parse(_dedent(source))
250    except Exception as e:
251        warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===")
252        return ast.parse("")
253
254
255@cache
256def _dedent(source: str) -> str:
257    """
258    Dedent the head of a function or class definition so that it can be parsed by `ast.parse`.
259    This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings
260    without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`:
261
262    class Foo:
263        def bar(self):
264           '''
265    this is a docstring
266           '''
267    """
268    if not source or source[0] not in (" ", "\t"):
269        return source
270    source = source.lstrip()
271    # we may have decorators before our function definition, in which case we need to dedent a few more lines.
272    # the following heuristic should be good enough to detect if we have reached the definition.
273    # it's easy to produce examples where this fails, but this probably is not a problem in practice.
274    if not any(source.startswith(x) for x in ["async ", "def ", "class "]):
275        first_line, rest = source.split("\n", 1)
276        return first_line + "\n" + _dedent(rest)
277    else:
278        return source
279
280
281@cache
282def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]:
283    """
284    Returns the list of all nodes in tree's body, but also inlines the body of __init__.
285
286    This is useful to detect all declared variables in a class, even if they only appear in the constructor.
287    """
288    return list(_nodes_iter(tree))
289
290
291def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]:
292    for a in tree.body:
293        yield a
294        if isinstance(a, ast.FunctionDef) and a.name == "__init__":
295            yield from _init_nodes(a)
296
297
298def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]:
299    """
300    Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42",
301    keep all constant expressions, and no-op everything else.
302    This essentially allows us to inline __init__ when parsing a class definition.
303    """
304    for a in tree.body:
305        if (
306            isinstance(a, ast.AnnAssign)
307            and isinstance(a.target, ast.Attribute)
308            and isinstance(a.target.value, ast.Name)
309            and a.target.value.id == "self"
310        ):
311            yield ast.AnnAssign(
312                ast.Name(a.target.attr), a.annotation, a.value, simple=1
313            )
314        elif (
315            isinstance(a, ast.Assign)
316            and len(a.targets) == 1
317            and isinstance(a.targets[0], ast.Attribute)
318            and isinstance(a.targets[0].value, ast.Name)
319            and a.targets[0].value.id == "self"
320        ):
321            yield ast.Assign(
322                [ast.Name(a.targets[0].attr)],
323                value=a.value,
324                # not available on Python 3.7
325                type_comment=getattr(a, "type_comment", None),
326            )
327        elif (
328            isinstance(a, ast.Expr)
329            and isinstance(a.value, ast.Constant)
330            and isinstance(a.value.value, str)
331        ):
332            yield a
333        elif isinstance(a, ast.Expr) and isinstance(
334            a.value, ast.Str
335        ):  # pragma: no cover
336            # Python <= 3.7
337            yield a
338        else:
339            yield ast.Pass()
340
341
342def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]:
343    """s -> (s0,s1), (s1,s2), (s2, s3),  ..., (sN, None)"""
344    a, b = tee(iterable)
345    next(b, None)
346    return zip_longest(a, b)
def get_source(obj: Any) -> str:
21def get_source(obj: Any) -> str:
22    """
23    Returns the source code of the Python object `obj` as a str.
24    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
25
26    If this fails, an empty string is returned.
27    """
28    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
29    try:
30        return _get_source(obj)
31    except TypeError:
32        return _get_source.__wrapped__(obj)

Returns the source code of the Python object obj as a str. This tries to first unwrap the method if it is wrapped and then calls inspect.getsource.

If this fails, an empty string is returned.

def parse(obj)
58def parse(obj):
59    """
60    Parse a module, class or function and return the (unwrapped) AST node.
61    If an object's source code cannot be found, this function returns an empty ast node stub
62    which can still be walked.
63    """
64    src = get_source(obj)
65    if isinstance(obj, types.ModuleType):
66        return _parse_module(src)
67    elif isinstance(obj, type):
68        return _parse_class(src)
69    else:
70        return _parse_function(src)

Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.

@cache
def unparse(tree: ast.AST)
73@cache
74def unparse(tree: ast.AST):
75    """`ast.unparse`, but cached."""
76    return ast_unparse(tree)

ast.unparse, but cached.

@dataclass
class AstInfo:
79@dataclass
80class AstInfo:
81    """The information extracted from walking the syntax tree."""
82
83    docstrings: dict[str, str]
84    """A qualname -> docstring mapping."""
85    annotations: dict[str, str]
86    """A qualname -> annotation mapping.
87    
88    Annotations are not evaluated by this module and only returned as strings."""

The information extracted from walking the syntax tree.

AstInfo(docstrings: dict[str, str], annotations: dict[str, str])
docstrings: dict[str, str]

A qualname -> docstring mapping.

annotations: dict[str, str]

A qualname -> annotation mapping.

Annotations are not evaluated by this module and only returned as strings.

def walk_tree(obj: module | type) -> pdoc.doc_ast.AstInfo:
91def walk_tree(obj: types.ModuleType | type) -> AstInfo:
92    """
93    Walks the abstract syntax tree for `obj` and returns the extracted information.
94    """
95    return _walk_tree(parse(obj))

Walks the abstract syntax tree for obj and returns the extracted information.

def sort_by_source( obj: module | type, sorted: dict[str, ~T], unsorted: dict[str, ~T]) -> tuple[dict[str, ~T], dict[str, ~T]]:
136def sort_by_source(
137    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
138) -> tuple[dict[str, T], dict[str, T]]:
139    """
140    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
141    The only exception to this rule is `__init__`, which (if present) is always inserted first.
142
143    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
144
145    Returns a `(sorted, not found)` tuple.
146    """
147    tree = parse(obj)
148
149    if "__init__" in unsorted:
150        sorted["__init__"] = unsorted.pop("__init__")
151
152    for a in _nodes(tree):
153        if (
154            isinstance(a, ast.Assign)
155            and len(a.targets) == 1
156            and isinstance(a.targets[0], ast.Name)
157        ):
158            name = a.targets[0].id
159        elif (
160            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
161        ):
162            name = a.target.id
163        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
164            name = a.name
165        else:
166            continue
167
168        if name in unsorted:
169            sorted[name] = unsorted.pop(name)
170    return sorted, unsorted

Takes items from unsorted and inserts them into sorted in order of appearance in the source code of obj. The only exception to this rule is __init__, which (if present) is always inserted first.

Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.

Returns a (sorted, not found) tuple.

def type_checking_sections(mod: module) -> ast.Module:
173def type_checking_sections(mod: types.ModuleType) -> ast.Module:
174    """
175    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
176    """
177    ret = ast.Module(body=[], type_ignores=[])
178    tree = _parse_module(get_source(mod))
179    for node in tree.body:
180        if (
181            isinstance(node, ast.If)
182            and isinstance(node.test, ast.Name)
183            and node.test.id == "TYPE_CHECKING"
184        ):
185            ret.body.extend(node.body)
186        if (
187            isinstance(node, ast.If)
188            and isinstance(node.test, ast.Attribute)
189            and isinstance(node.test.value, ast.Name)
190            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
191            # and node.test.value.id == "typing"
192            and node.test.attr == "TYPE_CHECKING"
193        ):
194            ret.body.extend(node.body)
195    return ret

Walks the abstract syntax tree for mod and returns all statements guarded by TYPE_CHECKING blocks.