Edit on GitHub

pdoc.doc_ast

This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.

Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__.

  1"""
  2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc.
  3
  4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`.
  5"""
  6from __future__ import annotations
  7
  8import ast
  9import inspect
 10import types
 11import warnings
 12from collections.abc import Iterable
 13from collections.abc import Iterator
 14from dataclasses import dataclass
 15from itertools import tee
 16from itertools import zip_longest
 17from typing import Any
 18from typing import overload
 19from typing import TypeVar
 20
 21import pdoc
 22from ._compat import ast_unparse
 23from ._compat import cache
 24
 25
 26def get_source(obj: Any) -> str:
 27    """
 28    Returns the source code of the Python object `obj` as a str.
 29    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
 30
 31    If this fails, an empty string is returned.
 32    """
 33    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
 34    try:
 35        return _get_source(obj)
 36    except TypeError:
 37        return _get_source.__wrapped__(obj)
 38
 39
 40@cache
 41def _get_source(obj: Any) -> str:
 42    try:
 43        return inspect.getsource(obj)
 44    except Exception:
 45        return ""
 46
 47
 48@overload
 49def parse(obj: types.ModuleType) -> ast.Module:
 50    ...
 51
 52
 53@overload
 54def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef:
 55    ...
 56
 57
 58@overload
 59def parse(obj: type) -> ast.ClassDef:
 60    ...
 61
 62
 63def parse(obj):
 64    """
 65    Parse a module, class or function and return the (unwrapped) AST node.
 66    If an object's source code cannot be found, this function returns an empty ast node stub
 67    which can still be walked.
 68    """
 69    src = get_source(obj)
 70    if isinstance(obj, types.ModuleType):
 71        return _parse_module(src)
 72    elif isinstance(obj, type):
 73        return _parse_class(src)
 74    else:
 75        return _parse_function(src)
 76
 77
 78@cache
 79def unparse(tree: ast.AST):
 80    """`ast.unparse`, but cached."""
 81    return ast_unparse(tree)
 82
 83
 84@dataclass
 85class AstInfo:
 86    """The information extracted from walking the syntax tree."""
 87
 88    var_docstrings: dict[str, str]
 89    """A qualname -> docstring mapping."""
 90    func_docstrings: dict[str, str]
 91    """A qualname -> docstring mapping for functions."""
 92    annotations: dict[str, str]
 93    """A qualname -> annotation mapping.
 94    
 95    Annotations are not evaluated by this module and only returned as strings."""
 96
 97
 98def walk_tree(obj: types.ModuleType | type) -> AstInfo:
 99    """
100    Walks the abstract syntax tree for `obj` and returns the extracted information.
101    """
102    return _walk_tree(parse(obj))
103
104
105@cache
106def _walk_tree(
107    tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef,
108) -> AstInfo:
109    var_docstrings = {}
110    func_docstrings = {}
111    annotations = {}
112    for a, b in _pairwise_longest(_nodes(tree)):
113        if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple:
114            name = a.target.id
115            annotations[name] = unparse(a.annotation)
116        elif (
117            isinstance(a, ast.Assign)
118            and len(a.targets) == 1
119            and isinstance(a.targets[0], ast.Name)
120        ):
121            name = a.targets[0].id
122            # Make sure that all assignments are picked up, even is there is
123            # no annotation or docstring.
124            annotations.setdefault(name, pdoc.doc_types.empty)
125        elif isinstance(a, ast.FunctionDef) and a.body:
126            first = a.body[0]
127            if isinstance(first, ast.Expr) and isinstance(first.value, ast.Str):
128                func_docstrings[a.name] = inspect.cleandoc(first.value.s).strip()
129            continue
130        else:
131            continue
132        if (
133            isinstance(b, ast.Expr)
134            and isinstance(b.value, ast.Constant)
135            and isinstance(b.value.value, str)
136        ):
137            var_docstrings[name] = inspect.cleandoc(b.value.value).strip()
138        elif isinstance(b, ast.Expr) and isinstance(
139            b.value, ast.Str
140        ):  # pragma: no cover
141            # Python <= 3.7
142            var_docstrings[name] = inspect.cleandoc(b.value.s).strip()
143    return AstInfo(
144        var_docstrings,
145        func_docstrings,
146        annotations,
147    )
148
149
150T = TypeVar("T")
151
152
153def sort_by_source(
154    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
155) -> tuple[dict[str, T], dict[str, T]]:
156    """
157    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
158    The only exception to this rule is `__init__`, which (if present) is always inserted first.
159
160    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
161
162    Returns a `(sorted, not found)` tuple.
163    """
164    tree = parse(obj)
165
166    if "__init__" in unsorted:
167        sorted["__init__"] = unsorted.pop("__init__")
168
169    for a in _nodes(tree):
170        if (
171            isinstance(a, ast.Assign)
172            and len(a.targets) == 1
173            and isinstance(a.targets[0], ast.Name)
174        ):
175            name = a.targets[0].id
176        elif (
177            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
178        ):
179            name = a.target.id
180        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
181            name = a.name
182        else:
183            continue
184
185        if name in unsorted:
186            sorted[name] = unsorted.pop(name)
187    return sorted, unsorted
188
189
190def type_checking_sections(mod: types.ModuleType) -> ast.Module:
191    """
192    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
193    """
194    ret = ast.Module(body=[], type_ignores=[])
195    tree = _parse_module(get_source(mod))
196    for node in tree.body:
197        if (
198            isinstance(node, ast.If)
199            and isinstance(node.test, ast.Name)
200            and node.test.id == "TYPE_CHECKING"
201        ):
202            ret.body.extend(node.body)
203        if (
204            isinstance(node, ast.If)
205            and isinstance(node.test, ast.Attribute)
206            and isinstance(node.test.value, ast.Name)
207            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
208            # and node.test.value.id == "typing"
209            and node.test.attr == "TYPE_CHECKING"
210        ):
211            ret.body.extend(node.body)
212    return ret
213
214
215@cache
216def _parse_module(source: str) -> ast.Module:
217    """
218    Parse the AST for the source code of a module and return the ast.Module.
219
220    Returns an empty ast.Module if source is empty.
221    """
222    tree = _parse(source)
223    assert isinstance(tree, ast.Module)
224    return tree
225
226
227@cache
228def _parse_class(source: str) -> ast.ClassDef:
229    """
230    Parse the AST for the source code of a class and return the ast.ClassDef.
231
232    Returns an empty ast.ClassDef if source is empty.
233    """
234    tree = _parse(source)
235    assert len(tree.body) <= 1
236    if tree.body:
237        t = tree.body[0]
238        assert isinstance(t, ast.ClassDef)
239        return t
240    return ast.ClassDef(body=[], decorator_list=[])
241
242
243@cache
244def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef:
245    """
246    Parse the AST for the source code of a (async) function and return the matching AST node.
247
248    Returns an empty ast.FunctionDef if source is empty.
249    """
250    tree = _parse(source)
251    assert len(tree.body) <= 1
252    if tree.body:
253        t = tree.body[0]
254        if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)):
255            return t
256        else:
257            # we have a lambda function,
258            # to simplify the API return the ast.FunctionDef stub.
259            pass
260    return ast.FunctionDef(body=[], decorator_list=[])
261
262
263def _parse(
264    source: str,
265) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef:
266    try:
267        return ast.parse(_dedent(source))
268    except Exception as e:
269        warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===")
270        return ast.parse("")
271
272
273@cache
274def _dedent(source: str) -> str:
275    """
276    Dedent the head of a function or class definition so that it can be parsed by `ast.parse`.
277    This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings
278    without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`:
279
280    class Foo:
281        def bar(self):
282           '''
283    this is a docstring
284           '''
285    """
286    if not source or source[0] not in (" ", "\t"):
287        return source
288    source = source.lstrip()
289    # we may have decorators before our function definition, in which case we need to dedent a few more lines.
290    # the following heuristic should be good enough to detect if we have reached the definition.
291    # it's easy to produce examples where this fails, but this probably is not a problem in practice.
292    if not any(source.startswith(x) for x in ["async ", "def ", "class "]):
293        first_line, rest = source.split("\n", 1)
294        return first_line + "\n" + _dedent(rest)
295    else:
296        return source
297
298
299@cache
300def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]:
301    """
302    Returns the list of all nodes in tree's body, but also inlines the body of __init__.
303
304    This is useful to detect all declared variables in a class, even if they only appear in the constructor.
305    """
306    return list(_nodes_iter(tree))
307
308
309def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]:
310    for a in tree.body:
311        yield a
312        if isinstance(a, ast.FunctionDef) and a.name == "__init__":
313            yield from _init_nodes(a)
314
315
316def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]:
317    """
318    Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42",
319    keep all constant expressions, and no-op everything else.
320    This essentially allows us to inline __init__ when parsing a class definition.
321    """
322    for a in tree.body:
323        if (
324            isinstance(a, ast.AnnAssign)
325            and isinstance(a.target, ast.Attribute)
326            and isinstance(a.target.value, ast.Name)
327            and a.target.value.id == "self"
328        ):
329            yield ast.AnnAssign(
330                ast.Name(a.target.attr), a.annotation, a.value, simple=1
331            )
332        elif (
333            isinstance(a, ast.Assign)
334            and len(a.targets) == 1
335            and isinstance(a.targets[0], ast.Attribute)
336            and isinstance(a.targets[0].value, ast.Name)
337            and a.targets[0].value.id == "self"
338        ):
339            yield ast.Assign(
340                [ast.Name(a.targets[0].attr)],
341                value=a.value,
342                # not available on Python 3.7
343                type_comment=getattr(a, "type_comment", None),
344            )
345        elif (
346            isinstance(a, ast.Expr)
347            and isinstance(a.value, ast.Constant)
348            and isinstance(a.value.value, str)
349        ):
350            yield a
351        elif isinstance(a, ast.Expr) and isinstance(
352            a.value, ast.Str
353        ):  # pragma: no cover
354            # Python <= 3.7
355            yield a
356        else:
357            yield ast.Pass()
358
359
360def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]:
361    """s -> (s0,s1), (s1,s2), (s2, s3),  ..., (sN, None)"""
362    a, b = tee(iterable)
363    next(b, None)
364    return zip_longest(a, b)
def get_source(obj: Any) -> str:
27def get_source(obj: Any) -> str:
28    """
29    Returns the source code of the Python object `obj` as a str.
30    This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`.
31
32    If this fails, an empty string is returned.
33    """
34    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
35    try:
36        return _get_source(obj)
37    except TypeError:
38        return _get_source.__wrapped__(obj)

Returns the source code of the Python object obj as a str. This tries to first unwrap the method if it is wrapped and then calls inspect.getsource.

If this fails, an empty string is returned.

def parse(obj):
64def parse(obj):
65    """
66    Parse a module, class or function and return the (unwrapped) AST node.
67    If an object's source code cannot be found, this function returns an empty ast node stub
68    which can still be walked.
69    """
70    src = get_source(obj)
71    if isinstance(obj, types.ModuleType):
72        return _parse_module(src)
73    elif isinstance(obj, type):
74        return _parse_class(src)
75    else:
76        return _parse_function(src)

Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.

@cache
def unparse(tree: ast.AST):
79@cache
80def unparse(tree: ast.AST):
81    """`ast.unparse`, but cached."""
82    return ast_unparse(tree)

ast.unparse, but cached.

@dataclass
class AstInfo:
85@dataclass
86class AstInfo:
87    """The information extracted from walking the syntax tree."""
88
89    var_docstrings: dict[str, str]
90    """A qualname -> docstring mapping."""
91    func_docstrings: dict[str, str]
92    """A qualname -> docstring mapping for functions."""
93    annotations: dict[str, str]
94    """A qualname -> annotation mapping.
95    
96    Annotations are not evaluated by this module and only returned as strings."""

The information extracted from walking the syntax tree.

AstInfo( var_docstrings: dict[str, str], func_docstrings: dict[str, str], annotations: dict[str, str])
var_docstrings: dict[str, str]

A qualname -> docstring mapping.

func_docstrings: dict[str, str]

A qualname -> docstring mapping for functions.

annotations: dict[str, str]

A qualname -> annotation mapping.

Annotations are not evaluated by this module and only returned as strings.

def walk_tree(obj: module | type) -> pdoc.doc_ast.AstInfo:
 99def walk_tree(obj: types.ModuleType | type) -> AstInfo:
100    """
101    Walks the abstract syntax tree for `obj` and returns the extracted information.
102    """
103    return _walk_tree(parse(obj))

Walks the abstract syntax tree for obj and returns the extracted information.

def sort_by_source( obj: module | type, sorted: dict[str, ~T], unsorted: dict[str, ~T]) -> tuple[dict[str, ~T], dict[str, ~T]]:
154def sort_by_source(
155    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
156) -> tuple[dict[str, T], dict[str, T]]:
157    """
158    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
159    The only exception to this rule is `__init__`, which (if present) is always inserted first.
160
161    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
162
163    Returns a `(sorted, not found)` tuple.
164    """
165    tree = parse(obj)
166
167    if "__init__" in unsorted:
168        sorted["__init__"] = unsorted.pop("__init__")
169
170    for a in _nodes(tree):
171        if (
172            isinstance(a, ast.Assign)
173            and len(a.targets) == 1
174            and isinstance(a.targets[0], ast.Name)
175        ):
176            name = a.targets[0].id
177        elif (
178            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
179        ):
180            name = a.target.id
181        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
182            name = a.name
183        else:
184            continue
185
186        if name in unsorted:
187            sorted[name] = unsorted.pop(name)
188    return sorted, unsorted

Takes items from unsorted and inserts them into sorted in order of appearance in the source code of obj. The only exception to this rule is __init__, which (if present) is always inserted first.

Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.

Returns a (sorted, not found) tuple.

def type_checking_sections(mod: module) -> ast.Module:
191def type_checking_sections(mod: types.ModuleType) -> ast.Module:
192    """
193    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
194    """
195    ret = ast.Module(body=[], type_ignores=[])
196    tree = _parse_module(get_source(mod))
197    for node in tree.body:
198        if (
199            isinstance(node, ast.If)
200            and isinstance(node.test, ast.Name)
201            and node.test.id == "TYPE_CHECKING"
202        ):
203            ret.body.extend(node.body)
204        if (
205            isinstance(node, ast.If)
206            and isinstance(node.test, ast.Attribute)
207            and isinstance(node.test.value, ast.Name)
208            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
209            # and node.test.value.id == "typing"
210            and node.test.attr == "TYPE_CHECKING"
211        ):
212            ret.body.extend(node.body)
213    return ret

Walks the abstract syntax tree for mod and returns all statements guarded by TYPE_CHECKING blocks.